diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index c4c650e9b5c2d7c63e194d8899f35ebb3637ffc7..3bbda8df3e769aa927a1f5784f0f799c6f11e3f7 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -301,6 +301,7 @@ extern void kportal_blockallsigs (void); #endif # include <unistd.h> # include <time.h> +# include <limits.h> # include <asm/types.h> # ifndef DEBUG_SUBSYSTEM # define DEBUG_SUBSYSTEM S_UNDEFINED @@ -433,40 +434,6 @@ struct portals_device_userstate * USER LEVEL STUFF BELOW */ -#define PORTALS_CFG_VERSION 0x00010001; - -struct portals_cfg { - __u32 pcfg_version; - __u32 pcfg_command; - - __u32 pcfg_nal; - __u32 pcfg_flags; - - __u32 pcfg_gw_nal; - __u64 pcfg_nid; - __u64 pcfg_nid2; - __u64 pcfg_nid3; - __u32 pcfg_id; - __u32 pcfg_misc; - __u32 pcfg_fd; - __u32 pcfg_count; - __u32 pcfg_size; - __u32 pcfg_wait; - - __u32 pcfg_plen1; /* buffers in userspace */ - char *pcfg_pbuf1; - __u32 pcfg_plen2; /* buffers in userspace */ - char *pcfg_pbuf2; -}; - -#define PCFG_INIT(pcfg, cmd) \ -do { \ - memset(&pcfg, 0, sizeof(pcfg)); \ - pcfg.pcfg_version = PORTALS_CFG_VERSION; \ - pcfg.pcfg_command = (cmd); \ - \ -} while (0) - #define PORTAL_IOCTL_VERSION 0x00010007 #define PING_SYNC 0 #define PING_ASYNC 1 @@ -675,17 +642,10 @@ enum { SCIMACNAL = 6, ROUTER = 7, IBNAL = 8, + CRAY_KB_ERNAL = 9, NAL_ENUM_END_MARKER }; -#ifdef __KERNEL__ -extern ptl_handle_ni_t kqswnal_ni; -extern ptl_handle_ni_t ksocknal_ni; -extern ptl_handle_ni_t kgmnal_ni; -extern ptl_handle_ni_t kibnal_ni; -extern ptl_handle_ni_t kscimacnal_ni; -#endif - #define PTL_NALFMT_SIZE 16 #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) @@ -711,10 +671,6 @@ enum { DEBUG_DAEMON_CONTINUE = 4, }; -/* module.c */ -typedef int (*nal_cmd_handler_t)(struct portals_cfg *, void * private); -int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private); -int kportal_nal_unregister(int nal); enum cfg_record_type { PORTALS_CFG_TYPE = 1, @@ -722,10 +678,6 @@ enum cfg_record_type { }; typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); -int kportal_nal_cmd(struct portals_cfg *); - -ptl_handle_ni_t *kportal_get_ni (int nal); -void kportal_put_ni (int nal); #ifdef __CYGWIN__ # ifndef BITS_PER_LONG diff --git a/lnet/include/linux/kpr.h b/lnet/include/linux/kpr.h index 45b58fe6453e5e7f80249aa48ba0630a08a294af..51d2d2f7abc813052fecb32bcdc1ce3041221390 100644 --- a/lnet/include/linux/kpr.h +++ b/lnet/include/linux/kpr.h @@ -81,21 +81,6 @@ typedef struct { void *kpr_arg; } kpr_router_t; -/* Router's control interface (Kernel Portals Routing Control Interface) */ -typedef const struct { - int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_get_route)(int index, int *gateway_nal, - ptl_nid_t *gateway, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, - int *alive); - int (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid, - int alive, time_t when); -} kpr_control_interface_t; - -extern kpr_control_interface_t kpr_control_interface; extern kpr_router_interface_t kpr_router_interface; static inline int diff --git a/lnet/include/linux/libcfs.h b/lnet/include/linux/libcfs.h index ff517870210bbb60b90d1fec5a736d049c268d93..f33e56ffbb580905f51b61cf28772146408b12fb 100644 --- a/lnet/include/linux/libcfs.h +++ b/lnet/include/linux/libcfs.h @@ -164,6 +164,45 @@ do { \ #define EXIT do { } while (0) #endif +#define PORTALS_CFG_VERSION 0x00010001; + +struct portals_cfg { + __u32 pcfg_version; + __u32 pcfg_command; + + __u32 pcfg_nal; + __u32 pcfg_flags; + + __u32 pcfg_gw_nal; + __u64 pcfg_nid; + __u64 pcfg_nid2; + __u64 pcfg_nid3; + __u32 pcfg_id; + __u32 pcfg_misc; + __u32 pcfg_fd; + __u32 pcfg_count; + __u32 pcfg_size; + __u32 pcfg_wait; + + __u32 pcfg_plen1; /* buffers in userspace */ + char *pcfg_pbuf1; + __u32 pcfg_plen2; /* buffers in userspace */ + char *pcfg_pbuf2; +}; + +#define PCFG_INIT(pcfg, cmd) \ +do { \ + memset(&pcfg, 0, sizeof(pcfg)); \ + pcfg.pcfg_version = PORTALS_CFG_VERSION; \ + pcfg.pcfg_command = (cmd); \ + \ +} while (0) + +typedef int (nal_cmd_handler_fn)(struct portals_cfg *, void *); +int libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *arg); +int libcfs_nal_cmd(struct portals_cfg *pcfg); +void libcfs_nal_cmd_unregister(int nal); + struct portal_ioctl_data { __u32 ioc_len; __u32 ioc_version; @@ -196,6 +235,7 @@ struct portal_ioctl_data { char ioc_bulk[0]; }; + #ifdef __KERNEL__ #include <linux/list.h> diff --git a/lnet/include/lnet/api.h b/lnet/include/lnet/api.h index 69fa339b9759407bedf6097fbe4c3e93222b3715..6d382bb8abb0d21943c92de8dbbbb18c578705a8 100644 --- a/lnet/include/lnet/api.h +++ b/lnet/include/lnet/api.h @@ -9,9 +9,9 @@ int PtlInit(int *); void PtlFini(void); -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in, - ptl_ac_index_t acl_size_in, ptl_pid_t requested_pid, - ptl_handle_ni_t * interface_out); +int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, + ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, + ptl_handle_ni_t *interface_out); int PtlNIInitialized(ptl_interface_t); @@ -37,17 +37,6 @@ int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out); #endif -/* - * PtlNIDebug: - * - * This is not an official Portals 3 API call. It is provided - * by the reference implementation to allow the maintainers an - * easy way to turn on and off debugging information in the - * library. Do not use it in code that is not intended for use - * with any version other than the portable reference library. - */ -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in); - /* * PtlNIFailNid * @@ -123,8 +112,8 @@ int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout, /* These should be called by users */ int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in, - int (*callback) (ptl_event_t * event), - ptl_handle_eq_t * handle_out); + ptl_eq_handler_t handler, + ptl_handle_eq_t *handle_out); int PtlEQFree(ptl_handle_eq_t eventq_in); int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out); diff --git a/lnet/include/lnet/arg-blocks.h b/lnet/include/lnet/arg-blocks.h index 0be8a3d53ebf41ded3980d0e250ded128c2ba64f..21e30d55ad6fd3ff58f9ea6866047c3a29f189d0 100644 --- a/lnet/include/lnet/arg-blocks.h +++ b/lnet/include/lnet/arg-blocks.h @@ -18,7 +18,7 @@ #define PTL_GETID 1 #define PTL_NISTATUS 2 #define PTL_NIDIST 3 -#define PTL_NIDEBUG 4 +// #define PTL_NIDEBUG 4 #define PTL_MEATTACH 5 #define PTL_MEINSERT 6 // #define PTL_MEPREPEND 7 @@ -205,7 +205,7 @@ typedef struct PtlEQAlloc_in { ptl_size_t count_in; void *base_in; int len_in; - int (*callback_in) (ptl_event_t * event); + ptl_eq_handler_t callback_in; } PtlEQAlloc_in; typedef struct PtlEQAlloc_out { diff --git a/lnet/include/lnet/errno.h b/lnet/include/lnet/errno.h index 499f32bd46c257c9531fba05aeca5e6f722db7a4..a98bfd94e550a9891c536769806c13956036fb6e 100644 --- a/lnet/include/lnet/errno.h +++ b/lnet/include/lnet/errno.h @@ -37,12 +37,11 @@ typedef enum { PTL_MD_NO_UPDATE = 18, PTL_FAIL = 19, - PTL_IOV_TOO_MANY = 20, - PTL_IOV_TOO_SMALL = 21, + PTL_IOV_INVALID = 20, - PTL_EQ_IN_USE = 22, + PTL_EQ_IN_USE = 21, - PTL_MAX_ERRNO = 23 + PTL_MAX_ERRNO = 22 } ptl_err_t; /* If you change these, you must update the string table in api-errno.c */ diff --git a/lnet/include/lnet/internal.h b/lnet/include/lnet/internal.h index 94f4f4812ce28ab05bb89521fc6775d56ae62c16..25778e441d86dc2c959018db990b6dc769892ffe 100644 --- a/lnet/include/lnet/internal.h +++ b/lnet/include/lnet/internal.h @@ -11,25 +11,10 @@ #include <portals/p30.h> -extern int ptl_init; /* Has the library be initialized */ +extern int ptl_init; /* Has the library been initialized */ extern int ptl_ni_init(void); -extern int ptl_me_init(void); -extern int ptl_md_init(void); -extern int ptl_eq_init(void); - -extern int ptl_me_ni_init(nal_t * nal); -extern int ptl_md_ni_init(nal_t * nal); -extern int ptl_eq_ni_init(nal_t * nal); - extern void ptl_ni_fini(void); -extern void ptl_me_fini(void); -extern void ptl_md_fini(void); -extern void ptl_eq_fini(void); - -extern void ptl_me_ni_fini(nal_t * nal); -extern void ptl_md_ni_fini(nal_t * nal); -extern void ptl_eq_ni_fini(nal_t * nal); static inline ptl_eq_t * ptl_handle2usereq (ptl_handle_eq_t *handle) diff --git a/lnet/include/lnet/lib-dispatch.h b/lnet/include/lnet/lib-dispatch.h index 90ed4f5afb7fc85c82cb65d3407e2bed7cf327d1..610c776de3c0537d3d8c36b5128e44bb081cfcac 100644 --- a/lnet/include/lnet/lib-dispatch.h +++ b/lnet/include/lnet/lib-dispatch.h @@ -18,7 +18,6 @@ extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlNIDebug(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args, diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 350447e85f39c1782150545621f9032aa0cd9fab..efa929cb07b053589e5298d60f2f46b6ec34e233 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -194,11 +194,11 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd) int niov; if ((umd->options & PTL_MD_KIOV) != 0) { - niov = umd->niov; + niov = umd->length; size = offsetof(lib_md_t, md_iov.kiov[niov]); } else { niov = ((umd->options & PTL_MD_IOVEC) != 0) ? - umd->niov : 1; + umd->length : 1; size = offsetof(lib_md_t, md_iov.iov[niov]); } @@ -346,8 +346,9 @@ ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) return (lh_entry (lh, lib_me_t, me_lh)); } -extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); +extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid, + ptl_ni_limits_t *desired_limits, + ptl_ni_limits_t *actual_limits); extern int lib_fini(nal_cb_t * cb); extern void lib_dispatch(nal_cb_t * cb, void *private, int index, void *arg_block, void *ret_block); diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h index 350447e85f39c1782150545621f9032aa0cd9fab..efa929cb07b053589e5298d60f2f46b6ec34e233 100644 --- a/lnet/include/lnet/lib-p30.h +++ b/lnet/include/lnet/lib-p30.h @@ -194,11 +194,11 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd) int niov; if ((umd->options & PTL_MD_KIOV) != 0) { - niov = umd->niov; + niov = umd->length; size = offsetof(lib_md_t, md_iov.kiov[niov]); } else { niov = ((umd->options & PTL_MD_IOVEC) != 0) ? - umd->niov : 1; + umd->length : 1; size = offsetof(lib_md_t, md_iov.iov[niov]); } @@ -346,8 +346,9 @@ ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) return (lh_entry (lh, lib_me_t, me_lh)); } -extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); +extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid, + ptl_ni_limits_t *desired_limits, + ptl_ni_limits_t *actual_limits); extern int lib_fini(nal_cb_t * cb); extern void lib_dispatch(nal_cb_t * cb, void *private, int index, void *arg_block, void *ret_block); diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 40776a61d15d508567fdd581b7e3ea6d159a24bb..ef618c7bd7e2a2d30842a0d46a5020fb05207dc1 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -169,7 +169,7 @@ struct lib_eq_t { ptl_size_t size; ptl_event_t *base; int eq_refcount; - int (*event_callback) (ptl_event_t * event); + ptl_eq_handler_t event_callback; void *eq_addrkey; }; @@ -245,15 +245,11 @@ typedef struct { * extracted by masking with (PTL_COOKIE_TYPES - 1) */ typedef struct { - int up; - int refcnt; ptl_nid_t nid; ptl_pid_t pid; - int num_nodes; - unsigned int debug; lib_ptl_t tbl; - lib_ac_t ac; lib_counters_t counters; + ptl_ni_limits_t actual_limits; int ni_lh_hash_size; /* size of lib handle hash table */ struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */ diff --git a/lnet/include/lnet/lnet.h b/lnet/include/lnet/lnet.h index 577ffabbcac5554de668aba29524f5b8eba469d6..4b8631ddee86564f90a9779dea2d43f6ac052457 100644 --- a/lnet/include/lnet/lnet.h +++ b/lnet/include/lnet/lnet.h @@ -21,45 +21,6 @@ #endif #include <portals/types.h> -#include <portals/nal.h> #include <portals/api.h> -#include <portals/nalids.h> - -/* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ -#define PTL_DEBUG_NONE 0ul -#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - -#define __bit(x) ((unsigned long) 1<<(x)) -#define PTL_DEBUG_PUT __bit(0) -#define PTL_DEBUG_GET __bit(1) -#define PTL_DEBUG_REPLY __bit(2) -#define PTL_DEBUG_ACK __bit(3) -#define PTL_DEBUG_DROP __bit(4) -#define PTL_DEBUG_REQUEST __bit(5) -#define PTL_DEBUG_DELIVERY __bit(6) -#define PTL_DEBUG_UNLINK __bit(7) -#define PTL_DEBUG_THRESHOLD __bit(8) -#define PTL_DEBUG_API __bit(9) - -/* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ -#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ -#define PTL_DEBUG_NI0 __bit(24) -#define PTL_DEBUG_NI1 __bit(25) -#define PTL_DEBUG_NI2 __bit(26) -#define PTL_DEBUG_NI3 __bit(27) -#define PTL_DEBUG_NI4 __bit(28) -#define PTL_DEBUG_NI5 __bit(29) -#define PTL_DEBUG_NI6 __bit(30) -#define PTL_DEBUG_NI7 __bit(31) #endif diff --git a/lnet/include/lnet/nal.h b/lnet/include/lnet/nal.h index 5b72046cd8b7be04fde8825759e99acde389bfe0..1f925c1685e2e0e1074db42edbc43fa935077bba 100644 --- a/lnet/include/lnet/nal.h +++ b/lnet/include/lnet/nal.h @@ -18,32 +18,29 @@ typedef struct nal_t nal_t; struct nal_t { - ptl_ni_t ni; - int refct; - void *nal_data; - int *timeout; /* for libp30api users */ - int (*forward) (nal_t * nal, int index, /* Function ID */ - void *args, size_t arg_len, void *ret, size_t ret_len); + int nal_refct; + void *nal_data; - int (*shutdown) (nal_t * nal, int interface); + int (*startup) (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *req, ptl_ni_limits_t *actual); + + void (*shutdown) (nal_t *nal); - int (*validate) (nal_t * nal, void *base, size_t extent); + int (*forward) (nal_t *nal, int index, /* Function ID */ + void *args, size_t arg_len, void *ret, size_t ret_len); - int (*yield) (nal_t * nal, unsigned long *flags, int milliseconds); + int (*yield) (nal_t *nal, unsigned long *flags, int milliseconds); - void (*lock) (nal_t * nal, unsigned long *flags); + void (*lock) (nal_t *nal, unsigned long *flags); - void (*unlock) (nal_t * nal, unsigned long *flags); + void (*unlock) (nal_t *nal, unsigned long *flags); }; -typedef nal_t *(ptl_interface_t) (int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_IP(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_MYR(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); - extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any); -#ifndef PTL_IFACE_DEFAULT -#define PTL_IFACE_DEFAULT (PTL_IFACE_IP) +#ifdef __KERNEL__ +extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal); +extern void ptl_unregister_nal(ptl_interface_t interface); #endif #endif diff --git a/lnet/include/lnet/nalids.h b/lnet/include/lnet/nalids.h index 1568593646f8430231ffd8246daae45bcb0384d6..55a991b70a4cef00fa18e144a218cc34c2f02c0e 100644 --- a/lnet/include/lnet/nalids.h +++ b/lnet/include/lnet/nalids.h @@ -1,6 +1,2 @@ #include "build_check.h" -#define PTL_IFACE_TCP 1 -#define PTL_IFACE_ER 2 -#define PTL_IFACE_SS 3 -#define PTL_IFACE_MAX 4 diff --git a/lnet/include/lnet/p30.h b/lnet/include/lnet/p30.h index 577ffabbcac5554de668aba29524f5b8eba469d6..4b8631ddee86564f90a9779dea2d43f6ac052457 100644 --- a/lnet/include/lnet/p30.h +++ b/lnet/include/lnet/p30.h @@ -21,45 +21,6 @@ #endif #include <portals/types.h> -#include <portals/nal.h> #include <portals/api.h> -#include <portals/nalids.h> - -/* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ -#define PTL_DEBUG_NONE 0ul -#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - -#define __bit(x) ((unsigned long) 1<<(x)) -#define PTL_DEBUG_PUT __bit(0) -#define PTL_DEBUG_GET __bit(1) -#define PTL_DEBUG_REPLY __bit(2) -#define PTL_DEBUG_ACK __bit(3) -#define PTL_DEBUG_DROP __bit(4) -#define PTL_DEBUG_REQUEST __bit(5) -#define PTL_DEBUG_DELIVERY __bit(6) -#define PTL_DEBUG_UNLINK __bit(7) -#define PTL_DEBUG_THRESHOLD __bit(8) -#define PTL_DEBUG_API __bit(9) - -/* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ -#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ -#define PTL_DEBUG_NI0 __bit(24) -#define PTL_DEBUG_NI1 __bit(25) -#define PTL_DEBUG_NI2 __bit(26) -#define PTL_DEBUG_NI3 __bit(27) -#define PTL_DEBUG_NI4 __bit(28) -#define PTL_DEBUG_NI5 __bit(29) -#define PTL_DEBUG_NI6 __bit(30) -#define PTL_DEBUG_NI7 __bit(31) #endif diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h index 902db764f072798b65ad680ddab0f738537770f5..51b557cc44b177237b786d1f7fdf328dd9cd0c55 100644 --- a/lnet/include/lnet/types.h +++ b/lnet/include/lnet/types.h @@ -41,7 +41,6 @@ typedef __u64 ptl_hdr_data_t; typedef __u32 ptl_size_t; #define PTL_TIME_FOREVER (-1) -#define PTL_EQ_HANDLER_NONE NULL typedef struct { unsigned long nal_idx; /* which network interface */ @@ -80,12 +79,6 @@ typedef enum { PTL_INS_AFTER } ptl_ins_pos_t; -typedef struct { - struct page *kiov_page; - unsigned int kiov_len; - unsigned int kiov_offset; -} ptl_kiov_t; - typedef struct { void *start; ptl_size_t length; @@ -94,7 +87,6 @@ typedef struct { unsigned int options; void *user_ptr; ptl_handle_eq_t eventq; - unsigned int niov; } ptl_md_t; /* Options for the MD structure */ @@ -112,9 +104,19 @@ typedef struct { /* For compatibility with Cray Portals */ #define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0 +#define PTL_MD_PHYS 0 #define PTL_MD_THRESH_INF (-1) +/* NB lustre portals uses struct iovec internally! */ +typedef struct iovec ptl_md_iovec_t; + +typedef struct { + struct page *kiov_page; + unsigned int kiov_len; + unsigned int kiov_offset; +} ptl_kiov_t; + typedef enum { PTL_EVENT_GET_START, PTL_EVENT_GET_END, @@ -168,6 +170,9 @@ typedef enum { PTL_NOACK_REQ } ptl_ack_req_t; +typedef void (*ptl_eq_handler_t)(ptl_event_t *event); +#define PTL_EQ_HANDLER_NONE NULL + typedef struct { volatile ptl_seq_t sequence; ptl_size_t size; @@ -180,11 +185,14 @@ typedef struct { } ptl_ni_t; typedef struct { - int max_match_entries; /* max number of match entries */ - int max_mem_descriptors; /* max number of memory descriptors */ - int max_event_queues; /* max number of event queues */ - int max_atable_index; /* maximum access control list table index */ - int max_ptable_index; /* maximum portals table index */ + int max_mes; + int max_mds; + int max_eqs; + int max_ac_index; + int max_pt_index; + int max_md_iovecs; + int max_me_list; + int max_getput_md; } ptl_ni_limits_t; /* @@ -202,4 +210,7 @@ typedef enum { typedef int ptl_sr_value_t; +typedef int ptl_interface_t; +#define PTL_IFACE_DEFAULT (-1) + #endif diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h index 995559944a5b9e2a842da3174840e76547da5543..e48552e7f214a10f78cb382215496fe6563baea0 100644 --- a/lnet/klnds/gmlnd/gmlnd.h +++ b/lnet/klnds/gmlnd/gmlnd.h @@ -190,7 +190,6 @@ typedef struct _gmnal_rxtwe { #define NRXTHREADS 10 /* max number of receiver threads */ typedef struct _gmnal_data_t { - int refcnt; spinlock_t cb_lock; spinlock_t stxd_lock; struct semaphore stxd_token; @@ -309,9 +308,12 @@ extern gmnal_data_t *global_nal_data; /* * API NAL */ +int gmnal_api_startup(nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); + int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); -int gmnal_api_shutdown(nal_t *, int); +void gmnal_api_shutdown(nal_t *); int gmnal_api_validate(nal_t *, void *, size_t); @@ -323,14 +325,13 @@ void gmnal_api_unlock(nal_t *, unsigned long *); #define GMNAL_INIT_NAL(a) do { \ + a->startup = gmnal_api_startup; \ a->forward = gmnal_api_forward; \ a->shutdown = gmnal_api_shutdown; \ - a->validate = NULL; \ a->yield = gmnal_api_yield; \ a->lock = gmnal_api_lock; \ a->unlock = gmnal_api_unlock; \ a->timeout = NULL; \ - a->refct = 1; \ a->nal_data = NULL; \ } while (0) @@ -373,7 +374,7 @@ void gmnal_cb_sti(nal_cb_t *, unsigned long *); int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); -nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); +int gmnal_init(void); void gmnal_fini(void); diff --git a/lnet/klnds/gmlnd/gmlnd_api.c b/lnet/klnds/gmlnd/gmlnd_api.c index 338d75cd094d1deddad1789f372465a618b5f104..7c94f937b56b8d79aca59a0420c746aa8777bdeb 100644 --- a/lnet/klnds/gmlnd/gmlnd_api.c +++ b/lnet/klnds/gmlnd/gmlnd_api.c @@ -123,18 +123,51 @@ gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, /* * gmnal_api_shutdown + * nal_refct == 0 => called on last matching PtlNIFini() * Close down this interface and free any resources associated with it * nal_t nal our nal to shutdown */ -int +void gmnal_api_shutdown(nal_t *nal, int interface) { + gmnal_data_t *nal_data; + nal_cb_t *nal_cb; - gmnal_data_t *nal_data = nal->nal_data; - + if (nal->nal_refct != 0) + return; + CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data); - return(PTL_OK); + LASSERT(nal == global_nal_data->nal); + nal_data = nal->nal_data; + LASSERT(nal_data == global_nal_data); + nal_cb = nal_data->nal_cb; + + /* Stop portals calling our ioctl handler */ + libcfs_nal_cmd_unregister(GMNAL); + + /* XXX for shutdown "under fire" we probably need to set a shutdown + * flag so when lib calls us we fail immediately and dont queue any + * more work but our threads can still call into lib OK. THEN + * shutdown our threads, THEN lib_fini() */ + lib_fini(nal_cb); + + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_txd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + if (nal_data->sysctl) + unregister_sysctl_table (nal_data->sysctl); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + + global_nal_data = NULL; + PORTAL_MODULE_UNUSE; } @@ -210,57 +243,54 @@ gmnal_api_unlock(nal_t *nal, unsigned long *flags) } -nal_t * -gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t rpid) +int +gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { - nal_t *nal = NULL; nal_cb_t *nal_cb = NULL; gmnal_data_t *nal_data = NULL; gmnal_srxd_t *srxd = NULL; gm_status_t gm_status; unsigned int local_nid = 0, global_nid = 0; - ptl_nid_t portals_nid; - ptl_pid_t portals_pid = 0; + ptl_process_id_t process_id; + + if (nal->nal_refct != 0) { + if (actual_limits != NULL) { + nal_data = (gmnal_data_t *)nal->nal_data; + nal_cb = nal_data->nal_cb; + *actual_limits = nal->_cb->ni.actual_limits; + return (PTL_OK); + } + /* Called on first PtlNIInit() */ - CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], " - "ac_size[%d]\n", interface, ptl_size, ac_size); + CDEBUG(D_TRACE, "startup\n"); + LASSERT(global_nal_data == NULL); PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t)); if (!nal_data) { CDEBUG(D_ERROR, "can't get memory\n"); - return(NULL); + return(PTL_NO_SPACE); } memset(nal_data, 0, sizeof(gmnal_data_t)); /* * set the small message buffer size */ - nal_data->refcnt = 1; CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data); CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size); - PORTAL_ALLOC(nal, sizeof(nal_t)); - if (!nal) { - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - return(NULL); - } - memset(nal, 0, sizeof(nal_t)); - CDEBUG(D_INFO, "Allocd and reset nal[%p]\n", nal); - PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); if (!nal_cb) { - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - return(NULL); + return(PTL_NO_SPACE); } memset(nal_cb, 0, sizeof(nal_cb_t)); CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb); - GMNAL_INIT_NAL(nal); GMNAL_INIT_NAL_CB(nal_cb); /* * String them all together @@ -280,10 +310,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, CDEBUG(D_INFO, "Calling gm_init\n"); if (gm_init() != GM_SUCCESS) { CDEBUG(D_ERROR, "call to gm_init failed\n"); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } @@ -326,10 +355,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, GMNAL_GM_LOCK(nal_data); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } @@ -344,10 +372,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } @@ -374,10 +401,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } gmnal_start_kernel_threads(nal_data); @@ -407,10 +433,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } nal_data->gm_local_nid = local_nid; CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid); @@ -428,10 +453,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid); nal_data->gm_global_nid = global_nid; @@ -440,13 +464,15 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, /* pid = gm_getpid(); */ - CDEBUG(D_INFO, "portals_pid is [%u]\n", portals_pid); - portals_nid = (unsigned long)global_nid; - CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", portals_nid); + process_id.pid = 0; + process_id.nid = global_nid; + + CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid); + CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid); CDEBUG(D_PORTALS, "calling lib_init\n"); - if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, - ac_size) != PTL_OK) { + if (lib_init(nal_cb, process_id, + requested_limits, actual_limits) != PTL_OK) { CDEBUG(D_ERROR, "lib_init failed\n"); gmnal_stop_rxthread(nal_data); gmnal_stop_ctthread(nal_data); @@ -456,22 +482,68 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } + + if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, nal->nal_data) != 0) { + CDEBUG(D_INFO, "libcfs_nal_cmd_register failed\n"); + + /* XXX these cleanup cases should be restructured to + * minimise duplication... */ + lib_fini(nal_cb); + + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_txd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(PTL_FAIL); + } + + /* might be better to initialise this at module load rather than in + * NAL startup */ nal_data->sysctl = NULL; nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0); CDEBUG(D_INFO, "gmnal_init finished\n"); global_nal_data = nal->nal_data; - return(nal); + + /* no unload now until shutdown */ + PORTAL_MODULE_USE; + + return(PTL_OK); } +nal_t the_gm_nal; + +/* + * Called when module loaded + */ +int gmnal_init(void) +{ + int rc; + + memset(&the_gm_nal, 0, sizeof(nal_t)); + CDEBUG(D_INFO, "reset nal[%p]\n", &the_gm_nal); + GMNAL_INIT_NAL(&the_gm_nal); + rc = ptl_register_nal(GMNAL, &the_gm_nal); + if (rc != PTL_OK) + CERROR("Can't register GMNAL: %d\n", rc); + + return (rc); +} + + /* * Called when module removed @@ -484,20 +556,7 @@ void gmnal_fini() CDEBUG(D_TRACE, "gmnal_fini\n"); - PtlNIFini(kgmnal_ni); - lib_fini(nal_cb); + LASSERT(global_nal_data == NULL); - gmnal_stop_rxthread(nal_data); - gmnal_stop_ctthread(nal_data); - gmnal_free_txd(nal_data); - gmnal_free_srxd(nal_data); - GMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - GMNAL_GM_UNLOCK(nal_data); - if (nal_data->sysctl) - unregister_sysctl_table (nal_data->sysctl); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + ptl_unregister_nal(GMNAL); } diff --git a/lnet/klnds/gmlnd/gmlnd_module.c b/lnet/klnds/gmlnd/gmlnd_module.c index 31f6819dabd2de3b5c55198d04f7f93f44f52fc1..278230e66edd8c14dc5ce602d33d3893a55d98a0 100644 --- a/lnet/klnds/gmlnd/gmlnd_module.c +++ b/lnet/klnds/gmlnd/gmlnd_module.c @@ -32,9 +32,6 @@ int num_rx_threads = -1; int num_stxds = 5; int gm_port = 4; -ptl_handle_ni_t kgmnal_ni; - - int gmnal_cmd(struct portals_cfg *pcfg, void *private) { @@ -92,26 +89,15 @@ gmnal_load(void) CDEBUG(D_INFO, "Calling gmnal_init\n"); - status = PtlNIInit(gmnal_init, 32, 4, 0, &kgmnal_ni); + statud = gmnal_init(); if (status == PTL_OK) { - CDEBUG(D_INFO, "Portals GMNAL initialised ok kgmnal_ni\n"); + CDEBUG(D_INFO, "Portals GMNAL initialised ok\n"); } else { CDEBUG(D_INFO, "Portals GMNAL Failed to initialise\n"); - return(1); + return(-ENODEV); } - CDEBUG(D_INFO, "Calling kportal_nal_register\n"); - /* - * global_nal_data is set by gmnal_init - */ - if (kportal_nal_register(GMNAL, &gmnal_cmd, global_nal_data) != 0) { - CDEBUG(D_INFO, "kportal_nal_register failed\n"); - return(1); - } - - CDEBUG(D_INFO, "Calling PORTAL_SYMBOL_REGISTER\n"); - PORTAL_SYMBOL_REGISTER(kgmnal_ni); CDEBUG(D_INFO, "This is the end of the gmnal init routine"); @@ -122,11 +108,7 @@ gmnal_load(void) static void __exit gmnal_unload(void) { - - kportal_nal_unregister(GMNAL); - PORTAL_SYMBOL_UNREGISTER(kgmnal_ni); gmnal_fini(); - global_nal_data = NULL; return; } @@ -135,8 +117,6 @@ module_init(gmnal_load); module_exit(gmnal_unload); -EXPORT_SYMBOL(kgmnal_ni); - MODULE_PARM(gmnal_small_msg_size, "i"); MODULE_PARM(num_rx_threads, "i"); MODULE_PARM(num_stxds, "i"); diff --git a/lnet/klnds/iblnd/ibnal.c b/lnet/klnds/iblnd/ibnal.c index 02beca7fb3dbb285fc7bbc6e036c786bb0c10b3e..86c2a6355206923d845a8321c343316a9ba7eb7b 100644 --- a/lnet/klnds/iblnd/ibnal.c +++ b/lnet/klnds/iblnd/ibnal.c @@ -235,11 +235,6 @@ kibnal_init(int interface, // no use here kibnal_data_t *nal_data = NULL; int rc; - unsigned int nnids = 1; // number of nids - // do we know how many nodes are in this - // system related to this kib_nid - // - CDEBUG(D_NET, "kibnal_init:calling lib_init with nid 0x%u\n", kibnal_data.kib_nid); @@ -252,7 +247,6 @@ kibnal_init(int interface, // no use here rc = lib_init(&kibnal_lib, kibnal_data.kib_nid, 0, // process id is set as 0 - nnids, ptl_size, ac_size); @@ -2034,16 +2028,13 @@ kibnal_initialize(void) CDEBUG(D_PORTALS, "kibnal_initialize: Enter kibnal_initialize\n"); // set api functional pointers + kibnal_api.startup = kibnal_startup; kibnal_api.forward = kibnal_forward; kibnal_api.shutdown = kibnal_shutdown; kibnal_api.yield = kibnal_yield; - kibnal_api.validate = NULL; /* our api validate is a NOOP */ kibnal_api.lock = kibnal_lock; kibnal_api.unlock = kibnal_unlock; kibnal_api.nal_data = &kibnal_data; // this is so called private data - kibnal_api.refct = 1; - kibnal_api.timeout = NULL; - kibnal_lib.nal_data = &kibnal_data; memset(&kibnal_data, 0, sizeof(kibnal_data)); diff --git a/lnet/klnds/qswlnd/Makefile.in b/lnet/klnds/qswlnd/Makefile.in index 7defd133256955ddf951ecaeb4cbfb4fc1e08131..17b17789ea11de01b5af7deb35f31e487e8a48ef 100644 --- a/lnet/klnds/qswlnd/Makefile.in +++ b/lnet/klnds/qswlnd/Makefile.in @@ -1,6 +1,6 @@ MODULES := kqswnal kqswnal-objs := qswnal.o qswnal_cb.o -EXTRA_CFLAGS := @QSWCPPFLAGS@ +EXTRA_CFLAGS := @QSWCPPFLAGS@ -I /usr/include @INCLUDE_RULES@ diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c index aeadd318638bb0e0ba50d9064978a0a169b73615..5359ef7590da6af80b4f62fdd27afaa355efd2ec 100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ b/lnet/klnds/qswlnd/qswlnd.c @@ -24,9 +24,10 @@ #include "qswnal.h" -ptl_handle_ni_t kqswnal_ni; nal_t kqswnal_api; kqswnal_data_t kqswnal_data; +ptl_handle_ni_t kqswnal_ni; +kqswnal_tunables_t kqswnal_tunables; kpr_nal_interface_t kqswnal_router_interface = { kprni_nalid: QSWNAL, @@ -43,10 +44,7 @@ kpr_nal_interface_t kqswnal_router_interface = { static ctl_table kqswnal_ctl_table[] = { {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets", - &kqswnal_data.kqn_optimized_gets, sizeof (int), - 0644, NULL, &proc_dointvec}, - {QSWNAL_SYSCTL_COPY_SMALL_FWD, "copy_small_fwd", - &kqswnal_data.kqn_copy_small_fwd, sizeof (int), + &kqswnal_tunables.kqn_optimized_gets, sizeof (int), 0644, NULL, &proc_dointvec}, {0} }; @@ -100,15 +98,6 @@ kqswnal_unlock(nal_t *nal, unsigned long *flags) nal_cb->cb_sti(nal_cb,flags); } -static int -kqswnal_shutdown(nal_t *nal, int ni) -{ - CDEBUG (D_NET, "shutdown\n"); - - LASSERT (nal == &kqswnal_api); - return (0); -} - static int kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds) { @@ -148,20 +137,6 @@ kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds) return (milliseconds); } -static nal_t * -kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t requested_pid) -{ - ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid); - int nnids = kqswnal_data.kqn_nnodes; - - CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids); - - lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size); - - return (&kqswnal_api); -} - int kqswnal_get_tx_desc (struct portals_cfg *pcfg) { @@ -219,11 +194,20 @@ kqswnal_cmd (struct portals_cfg *pcfg, void *private) } } -void __exit -kqswnal_finalise (void) +static void +kqswnal_shutdown(nal_t *nal) { unsigned long flags; - int do_ptl_fini = 0; + int do_lib_fini = 0; + + /* NB The first ref was this module! */ + if (nal->nal_refct != 0) { + PORTAL_MODULE_UNUSE; + return; + } + + CDEBUG (D_NET, "shutdown\n"); + LASSERT (nal == &kqswnal_api); switch (kqswnal_data.kqn_init) { @@ -231,16 +215,11 @@ kqswnal_finalise (void) LASSERT (0); case KQN_INIT_ALL: -#if CONFIG_SYSCTL - if (kqswnal_data.kqn_sysctl != NULL) - unregister_sysctl_table (kqswnal_data.kqn_sysctl); -#endif - PORTAL_SYMBOL_UNREGISTER (kqswnal_ni); - kportal_nal_unregister(QSWNAL); + libcfs_nal_cmd_unregister(QSWNAL); /* fall through */ - case KQN_INIT_PTL: - do_ptl_fini = 1; + case KQN_INIT_LIB: + do_lib_fini = 1; /* fall through */ case KQN_INIT_DATA: @@ -353,10 +332,8 @@ kqswnal_finalise (void) kpr_deregister (&kqswnal_data.kqn_router); - if (do_ptl_fini) { - PtlNIFini (kqswnal_ni); + if (do_lib_fini) lib_fini (&kqswnal_lib); - } /**********************************************************************/ /* Unmap message buffers and free all descriptors and buffers @@ -477,7 +454,9 @@ kqswnal_finalise (void) } static int __init -kqswnal_initialise (void) +kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { #if MULTIRAIL_EKC EP_RAILMASK all_rails = EP_RAILMASK_ALL; @@ -487,22 +466,21 @@ kqswnal_initialise (void) int rc; int i; int elan_page_idx; + ptl_process_id_t my_process_id; int pkmem = atomic_read(&portal_kmemory); + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = kqswnal_lib.ni.actual_limits; + /* This module got the first ref */ + PORTAL_MODULE_USE; + return (PTL_OK); + } + LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING); CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory)); - kqswnal_api.forward = kqswnal_forward; - kqswnal_api.shutdown = kqswnal_shutdown; - kqswnal_api.yield = kqswnal_yield; - kqswnal_api.validate = NULL; /* our api validate is a NOOP */ - kqswnal_api.lock = kqswnal_lock; - kqswnal_api.unlock = kqswnal_unlock; - kqswnal_api.nal_data = &kqswnal_data; - - kqswnal_lib.nal_data = &kqswnal_data; - memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success)); memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed)); #if MULTIRAIL_EKC @@ -513,9 +491,6 @@ kqswnal_initialise (void) /* ensure all pointers NULL etc */ memset (&kqswnal_data, 0, sizeof (kqswnal_data)); - kqswnal_data.kqn_optimized_gets = KQSW_OPTIMIZED_GETS; - kqswnal_data.kqn_copy_small_fwd = KQSW_COPY_SMALL_FWD; - kqswnal_data.kqn_cb = &kqswnal_lib; INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); @@ -537,18 +512,19 @@ kqswnal_initialise (void) /* pointers/lists/locks initialised */ kqswnal_data.kqn_init = KQN_INIT_DATA; - + #if MULTIRAIL_EKC kqswnal_data.kqn_ep = ep_system(); if (kqswnal_data.kqn_ep == NULL) { CERROR("Can't initialise EKC\n"); - return (-ENODEV); + kqswnal_shutdown(&kqswnal_api); + return (PTL_IFACE_INVALID); } if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) { CERROR("Can't get elan ID\n"); - kqswnal_finalise(); - return (-ENODEV); + kqswnal_shutdown(&kqswnal_api); + return (PTL_IFACE_INVALID); } #else /**********************************************************************/ @@ -558,7 +534,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_ep == NULL) { CERROR ("Can't get elan device 0\n"); - return (-ENODEV); + kqswnal_shutdown(&kqswnal_api); + return (PTL_IFACE_INVALID); } #endif @@ -573,8 +550,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_eptx == NULL) { CERROR ("Can't allocate transmitter\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /**********************************************************************/ @@ -586,8 +563,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_eprx_small == NULL) { CERROR ("Can't install small msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } kqswnal_data.kqn_eprx_large = ep_alloc_rcvr (kqswnal_data.kqn_ep, @@ -596,8 +573,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_eprx_large == NULL) { CERROR ("Can't install large msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /**********************************************************************/ @@ -611,8 +588,8 @@ kqswnal_initialise (void) EP_PERM_WRITE); if (kqswnal_data.kqn_ep_tx_nmh == NULL) { CERROR("Can't reserve tx dma space\n"); - kqswnal_finalise(); - return (-ENOMEM); + kqswnal_shutdown(&kqswnal_api); + return (PTL_NO_SPACE); } #else dmareq.Waitfn = DDI_DMA_SLEEP; @@ -626,8 +603,8 @@ kqswnal_initialise (void) if (rc != DDI_SUCCESS) { CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } #endif /**********************************************************************/ @@ -640,8 +617,8 @@ kqswnal_initialise (void) EP_PERM_WRITE); if (kqswnal_data.kqn_ep_tx_nmh == NULL) { CERROR("Can't reserve rx dma space\n"); - kqswnal_finalise(); - return (-ENOMEM); + kqswnal_shutdown(&kqswnal_api); + return (PTL_NO_SPACE); } #else dmareq.Waitfn = DDI_DMA_SLEEP; @@ -656,8 +633,8 @@ kqswnal_initialise (void) if (rc != DDI_SUCCESS) { CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } #endif /**********************************************************************/ @@ -667,8 +644,8 @@ kqswnal_initialise (void) sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS)); if (kqswnal_data.kqn_txds == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /* clear flags, null pointers etc */ @@ -683,8 +660,8 @@ kqswnal_initialise (void) PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); if (ktx->ktx_buffer == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /* Map pre-allocated buffer NOW, to save latency on transmit */ @@ -720,8 +697,8 @@ kqswnal_initialise (void) sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE)); if (kqswnal_data.kqn_rxds == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */ @@ -755,8 +732,8 @@ kqswnal_initialise (void) struct page *page = alloc_page(GFP_KERNEL); if (page == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } krx->krx_kiov[j].kiov_page = page; @@ -800,15 +777,19 @@ kqswnal_initialise (void) /**********************************************************************/ /* Network interface ready to initialise */ - rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni); - if (rc != 0) + my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid); + my_process_id.pid = 0; + + rc = lib_init(&kqswnal_lib, my_process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { - CERROR ("PtlNIInit failed %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); + CERROR ("lib_init failed %d\n", rc); + kqswnal_shutdown (&kqswnal_api); + return (rc); } - kqswnal_data.kqn_init = KQN_INIT_PTL; + kqswnal_data.kqn_init = KQN_INIT_LIB; /**********************************************************************/ /* Queue receives, now that it's OK to run their completion callbacks */ @@ -829,8 +810,8 @@ kqswnal_initialise (void) if (rc != EP_SUCCESS) { CERROR ("failed ep_queue_receive %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_FAIL); } } @@ -842,8 +823,8 @@ kqswnal_initialise (void) if (rc != 0) { CERROR ("failed to spawn scheduling thread: %d\n", rc); - kqswnal_finalise (); - return (rc); + kqswnal_shutdown (&kqswnal_api); + return (PTL_FAIL); } } @@ -852,19 +833,13 @@ kqswnal_initialise (void) rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface); CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc); - rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL); + rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - kqswnal_finalise (); - return (rc); + kqswnal_shutdown (&kqswnal_api); + return (PTL_FAIL); } -#if CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - kqswnal_data.kqn_sysctl = register_sysctl_table (kqswnal_top_ctl_table, 0); -#endif - - PORTAL_SYMBOL_REGISTER(kqswnal_ni); kqswnal_data.kqn_init = KQN_INIT_ALL; printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d " @@ -873,9 +848,61 @@ kqswnal_initialise (void) kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled", pkmem); - return (0); + return (PTL_OK); } +void __exit +kqswnal_finalise (void) +{ +#if CONFIG_SYSCTL + if (kqswnal_tunables.kqn_sysctl != NULL) + unregister_sysctl_table (kqswnal_tunables.kqn_sysctl); +#endif + PtlNIFini(kqswnal_ni); + + ptl_unregister_nal(QSWNAL); +} + +static int __init +kqswnal_initialise (void) +{ + int rc; + + kqswnal_api.startup = kqswnal_startup; + kqswnal_api.shutdown = kqswnal_shutdown; + kqswnal_api.forward = kqswnal_forward; + kqswnal_api.yield = kqswnal_yield; + kqswnal_api.lock = kqswnal_lock; + kqswnal_api.unlock = kqswnal_unlock; + kqswnal_api.nal_data = &kqswnal_data; + + kqswnal_lib.nal_data = &kqswnal_data; + + /* Initialise dynamic tunables to defaults once only */ + kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS; + + rc = ptl_register_nal(QSWNAL, &kqswnal_api); + if (rc != PTL_OK) { + CERROR("Can't register QSWNAL: %d\n", rc); + return (-ENOMEM); /* or something... */ + } + + /* Pure gateways, and the workaround for 'EKC blocks forever until + * the service is active' want the NAL started up at module load + * time... */ + rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { + ptl_unregister_nal(QSWNAL); + return (-ENODEV); + } + +#if CONFIG_SYSCTL + /* Press on regardless even if registering sysctl doesn't work */ + kqswnal_tunables.kqn_sysctl = + register_sysctl_table (kqswnal_top_ctl_table, 0); +#endif + return (0); +} MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); MODULE_DESCRIPTION("Kernel Quadrics/Elan NAL v1.01"); @@ -883,5 +910,3 @@ MODULE_LICENSE("GPL"); module_init (kqswnal_initialise); module_exit (kqswnal_finalise); - -EXPORT_SYMBOL (kqswnal_ni); diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index 93bf584477ad06269011a273ae56539b4e20f47b..1cd42db9396b323de5dd83452925d42f3644d63b 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -74,6 +74,7 @@ #include <linux/kpr.h> #include <portals/p30.h> #include <portals/lib-p30.h> +#include <portals/nal.h> #define KQSW_CHECKSUM 0 #if KQSW_CHECKSUM @@ -192,18 +193,21 @@ typedef struct #define KTX_FORWARDING 2 /* routing a packet */ #define KTX_GETTING 3 /* local optimised get */ +typedef struct +{ + /* dynamic tunables... */ + int kqn_optimized_gets; /* optimized GETs? */ +#if CONFIG_SYSCTL + struct ctl_table_header *kqn_sysctl; /* sysctl interface */ +#endif +} kqswnal_tunables_t; + typedef struct { char kqn_init; /* what's been initialised */ char kqn_shuttingdown; /* I'm trying to shut down */ atomic_t kqn_nthreads; /* # threads running */ - int kqn_optimized_gets; /* optimized GETs? */ - int kqn_copy_small_fwd; /* fwd small msgs from pre-allocated buffer? */ - -#if CONFIG_SYSCTL - struct ctl_table_header *kqn_sysctl; /* sysctl interface */ -#endif kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */ kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */ @@ -247,12 +251,13 @@ typedef struct /* kqn_init state */ #define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ #define KQN_INIT_DATA 1 -#define KQN_INIT_PTL 2 +#define KQN_INIT_LIB 2 #define KQN_INIT_ALL 3 -extern nal_cb_t kqswnal_lib; -extern nal_t kqswnal_api; -extern kqswnal_data_t kqswnal_data; +extern nal_cb_t kqswnal_lib; +extern nal_t kqswnal_api; +extern kqswnal_tunables_t kqswnal_tunables; +extern kqswnal_data_t kqswnal_data; /* global pre-prepared replies to keep off the stack */ extern EP_STATUSBLK kqswnal_rpc_success; diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index 577c578018f903ebb5ab9ec6689f812ee504030b..f92f97474d624d758f1ae45d5a3d440ba074b3a3 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -1027,7 +1027,7 @@ kqswnal_sendmsg (nal_cb_t *nal, memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum)); #endif - if (kqswnal_data.kqn_optimized_gets && + if (kqswnal_tunables.kqn_optimized_gets && type == PTL_MSG_GET && /* doing a GET */ nid == targetnid) { /* not forwarding */ lib_md_t *md = libmsg->md; diff --git a/lnet/klnds/scimaclnd/scimacnal.c b/lnet/klnds/scimaclnd/scimacnal.c index 35de6ebcf77e718850867d94e38d7eeb04cf08bc..e77bd8ee278dc69428c1e5d1525825e6907dfab5 100644 --- a/lnet/klnds/scimaclnd/scimacnal.c +++ b/lnet/klnds/scimaclnd/scimacnal.c @@ -26,7 +26,6 @@ #include "scimacnal.h" -ptl_handle_ni_t kscimacnal_ni; nal_t kscimacnal_api; kscimacnal_data_t kscimacnal_data; @@ -101,10 +100,34 @@ static void kscimacnal_unlock(nal_t *nal, unsigned long *flags) } -static int kscimacnal_shutdown(nal_t *nal, int ni) +static void kscimacnal_shutdown(nal_t *nal, int ni) { LASSERT (nal == &kscimacnal_api); - return 0; + LASSERT (kscimacnal_data.ksci_init); + + if (nal->nal_refct != 0) + return; + + /* Called on last matching PtlNIFini() */ + + /* FIXME: How should the shutdown procedure really look? + */ + kscimacnal_data.ksci_shuttingdown=1; + + /* Stop handling ioctls */ + libcfs_nal_cmd_unregister(SCIMACNAL); + + mac_finish(kscimacnal_data.ksci_machandle); + + /* finalise lib after net shuts up */ + lib_fini(&kscimacnal_lib); + + kscimacnal_data.ksci_init = 0; + + /* Allow unload */ + PORTAL_MODULE_UNUSE; + + return; } @@ -123,56 +146,26 @@ static void kscimacnal_yield( nal_t *nal, unsigned long *flags, int milliseconds } -static nal_t *kscimacnal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - int nnids = 512; /* FIXME: Need ScaMac funktion to get #nodes */ - - CDEBUG(D_NET, "calling lib_init with nid "LPX64" nnids %d\n", kscimacnal_data.ksci_nid, nnids); - lib_init(&kscimacnal_lib, kscimacnal_data.ksci_nid, 0, nnids,ptl_size, ac_size); - return &kscimacnal_api; -} - - -/* Called by kernel at module unload time */ -static void /*__exit*/ -kscimacnal_finalize(void) -{ - /* FIXME: How should the shutdown procedure really look? */ - kscimacnal_data.ksci_shuttingdown=1; - - PORTAL_SYMBOL_UNREGISTER(kscimacnal_ni); - - PtlNIFini(kscimacnal_ni); - lib_fini(&kscimacnal_lib); - - mac_finish(kscimacnal_data.ksci_machandle); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); - - return; -} - - -/* Called by kernel at module insertion time */ -static int __init -kscimacnal_initialize(void) +static int kscimacnal_startup(nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { int rc; - unsigned long nid=0; + mac_physaddr_t mac_physaddr; + ptl_process_id_t process_id; mac_handle_t *machandle = NULL; + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = kscimacnal_lib.ni.actual_limits; + return (PTL_OK); + } - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); - - kscimacnal_api.forward = kscimacnal_forward; - kscimacnal_api.shutdown = kscimacnal_shutdown; - kscimacnal_api.yield = kscimacnal_yield; - kscimacnal_api.validate = NULL; /* our api validate is a NOOP */ - kscimacnal_api.lock= kscimacnal_lock; - kscimacnal_api.unlock= kscimacnal_unlock; - kscimacnal_api.nal_data = &kscimacnal_data; + /* Called on first PtlNIInit(SCIMACNAL) */ + LASSERT (nal == kscimacnal_api); + LASSERT (!kscimacnal_data.ksci_init); + kscimacnal_lib.nal_data = &kscimacnal_data; memset(&kscimacnal_data, 0, sizeof(kscimacnal_data)); @@ -188,7 +181,7 @@ kscimacnal_initialize(void) if(!machandle) { CERROR("mac_init() failed\n"); - return -1; + return PTL_FAIL; } kscimacnal_data.ksci_machandle = machandle; @@ -199,45 +192,88 @@ kscimacnal_initialize(void) mac_get_mtusize(machandle), SCIMACNAL_MTU); CERROR("Consult README.scimacnal for more information\n"); mac_finish(machandle); - return -1; + return PTL_FAIL; } /* Get the node ID */ /* mac_get_physaddrlen() is a function instead of define, sigh */ - LASSERT(mac_get_physaddrlen(machandle) <= sizeof(nid)); - if(mac_get_physaddr(machandle, (mac_physaddr_t *) &nid)) { + LASSERT(mac_get_physaddrlen(machandle) <= sizeof(mac_physaddr)); + if(mac_get_physaddr(machandle, &mac_physaddr)) { CERROR("mac_get_physaddr() failed\n"); mac_finish(machandle); - return -1; + return PTL_FAIL; } - nid = ntohl(nid); - kscimacnal_data.ksci_nid = nid; + kscimacnal_data.ksci_nid = (ptl_nid_t)(ntohl(mac_physaddr)); + process_id.pid = 0; + process_id.nid = kscimacnal_data.ksci_nid; - /* Initialize Network Interface */ - /* FIXME: What do the magic numbers mean? Documentation anyone? */ - rc = PtlNIInit(kscimacnal_init, 32, 4, 0, &kscimacnal_ni); - if (rc) { + CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", + kscimacnal_data.ksci_nid); + + rc = lib_init(&kscimacnal_lib, process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { CERROR("PtlNIInit failed %d\n", rc); mac_finish(machandle); - return (-ENOMEM); + return (rc); } /* Init command interface */ - rc = kportal_nal_register (SCIMACNAL, &kscimacnal_cmd, NULL); + rc = libcfs_nal_cmd_register (SCIMACNAL, &kscimacnal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - PtlNIFini(kscimacnal_ni); + lib_fini(&kscimacnal_lib); mac_finish(machandle); - return (rc); + return (PTL_FAIL); } - - PORTAL_SYMBOL_REGISTER(kscimacnal_ni); - /* We're done now, it's OK for the RX callback to do stuff */ kscimacnal_data.ksci_init = 1; + /* Prevent unload before matching PtlNIFini() */ + PORTAL_MODULE_USE; + + return (PTL_OK); +} + + +/* Called by kernel at module unload time */ +static void /*__exit*/ +kscimacnal_finalize(void) +{ + LASSERT (!kscimacnal_data.ksci_init); + + ptl_unregister_nal(SCIMACNAL); + + CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); + + return; +} + + +/* Called by kernel at module insertion time */ +static int __init +kscimacnal_initialize(void) +{ + int rc; + + CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); + + kscimacnal_api.startup = kscimacnal_startup; + kscimacnal_api.forward = kscimacnal_forward; + kscimacnal_api.shutdown = kscimacnal_shutdown; + kscimacnal_api.yield = kscimacnal_yield; + kscimacnal_api.lock= kscimacnal_lock; + kscimacnal_api.unlock= kscimacnal_unlock; + kscimacnal_api.nal_data = &kscimacnal_data; + + rc = ptl_register_nal(SCIMACNAL, &kscimacnal_api); + if (rc != PTL_OK) { + CERROR("Can't register SCIMACNAL: %d\n", rc); + return (-ENODEV); + } + return 0; } diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index d874a6cf58cd1a47faf4e1d4f7441c36d92531f2..32bbbec5262d2597ea91ee5c23c062a65b91a743 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -25,13 +25,10 @@ #include "socknal.h" +nal_t ksocknal_api; +ksock_nal_data_t ksocknal_data; ptl_handle_ni_t ksocknal_ni; -static nal_t ksocknal_api; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -ksock_nal_data_t ksocknal_data; -#else -static ksock_nal_data_t ksocknal_data; -#endif +ksock_tunables_t ksocknal_tunables; kpr_nal_interface_t ksocknal_router_interface = { kprni_nalid: SOCKNAL, @@ -40,6 +37,7 @@ kpr_nal_interface_t ksocknal_router_interface = { kprni_notify: ksocknal_notify, }; +#ifdef CONFIG_SYSCTL #define SOCKNAL_SYSCTL 200 #define SOCKNAL_SYSCTL_TIMEOUT 1 @@ -50,21 +48,21 @@ kpr_nal_interface_t ksocknal_router_interface = { static ctl_table ksocknal_ctl_table[] = { {SOCKNAL_SYSCTL_TIMEOUT, "timeout", - &ksocknal_data.ksnd_io_timeout, sizeof (int), + &ksocknal_tunables.ksnd_io_timeout, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack", - &ksocknal_data.ksnd_eager_ack, sizeof (int), + &ksocknal_tunables.ksnd_eager_ack, sizeof (int), 0644, NULL, &proc_dointvec}, #if SOCKNAL_ZC {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy", - &ksocknal_data.ksnd_zc_min_frag, sizeof (int), + &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int), 0644, NULL, &proc_dointvec}, #endif {SOCKNAL_SYSCTL_TYPED, "typed", - &ksocknal_data.ksnd_typed_conns, sizeof (int), + &ksocknal_tunables.ksnd_typed_conns, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", - &ksocknal_data.ksnd_min_bulk, sizeof (int), + &ksocknal_tunables.ksnd_min_bulk, sizeof (int), 0644, NULL, &proc_dointvec}, { 0 } }; @@ -73,6 +71,7 @@ static ctl_table ksocknal_top_ctl_table[] = { {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, { 0 } }; +#endif int ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, @@ -88,12 +87,6 @@ ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, return PTL_OK; } -int -ksocknal_api_shutdown(nal_t *nal, int ni) -{ - return PTL_OK; -} - void ksocknal_api_lock(nal_t *nal, unsigned long *flags) { @@ -154,19 +147,6 @@ ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds) return (milliseconds); } -nal_t * -ksocknal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); - lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); - return (&ksocknal_api); -} - -/* - * EXTRA functions follow - */ - int ksocknal_set_mynid(ptl_nid_t nid) { @@ -832,7 +812,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, /* Set the deadline for the outgoing HELLO to drain */ conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; list_add (&conn->ksnc_list, &peer->ksnp_conns); atomic_inc (&conn->ksnc_refcount); @@ -1466,30 +1446,34 @@ ksocknal_free_buffers (void) } void -ksocknal_module_fini (void) +ksocknal_api_shutdown (nal_t *nal) { int i; + if (nal->nal_refct != 0) { + /* This module got the first ref */ + PORTAL_MODULE_UNUSE; + return; + } + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); + LASSERT(nal == &ksocknal_api); + switch (ksocknal_data.ksnd_init) { default: LASSERT (0); case SOCKNAL_INIT_ALL: -#if CONFIG_SYSCTL - if (ksocknal_data.ksnd_sysctl != NULL) - unregister_sysctl_table (ksocknal_data.ksnd_sysctl); -#endif - kportal_nal_unregister(SOCKNAL); - PORTAL_SYMBOL_UNREGISTER (ksocknal_ni); + libcfs_nal_cmd_unregister(SOCKNAL); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; /* fall through */ - case SOCKNAL_INIT_PTL: + case SOCKNAL_INIT_LIB: /* No more calls to ksocknal_cmd() to create new * autoroutes/connections since we're being unloaded. */ - PtlNIFini(ksocknal_ni); /* Delete all autoroute entries */ ksocknal_del_route(PTL_NID_ANY, 0, 0, 0); @@ -1510,6 +1494,8 @@ ksocknal_module_fini (void) /* Tell lib we've stopped calling into her. */ lib_fini(&ksocknal_lib); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; /* fall through */ case SOCKNAL_INIT_DATA: @@ -1557,6 +1543,8 @@ ksocknal_module_fini (void) kpr_deregister (&ksocknal_data.ksnd_router); ksocknal_free_buffers(); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; /* fall through */ case SOCKNAL_INIT_NOTHING: @@ -1571,7 +1559,7 @@ ksocknal_module_fini (void) } -void __init +void ksocknal_init_incarnation (void) { struct timeval tv; @@ -1587,42 +1575,31 @@ ksocknal_init_incarnation (void) (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; } -int __init -ksocknal_module_init (void) +int +ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; + ptl_process_id_t process_id; + int pkmem = atomic_read(&portal_kmemory); + int rc; + int i; + int j; - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - /* the following must be sizeof(int) for proc_dointvec() */ - LASSERT(sizeof (ksocknal_data.ksnd_io_timeout) == sizeof (int)); - LASSERT(sizeof (ksocknal_data.ksnd_eager_ack) == sizeof (int)); - /* check ksnr_connected/connecting field large enough */ - LASSERT(SOCKNAL_CONN_NTYPES <= 4); - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); + LASSERT (nal == &ksocknal_api); - ksocknal_api.forward = ksocknal_api_forward; - ksocknal_api.shutdown = ksocknal_api_shutdown; - ksocknal_api.validate = NULL; /* our api validate is a NOOP */ - ksocknal_api.lock = ksocknal_api_lock; - ksocknal_api.unlock = ksocknal_api_unlock; - ksocknal_api.nal_data = &ksocknal_data; + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = ksocknal_lib.ni.actual_limits; + /* This module got the first ref */ + PORTAL_MODULE_USE; + return (PTL_OK); + } - ksocknal_lib.nal_data = &ksocknal_data; + LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ - ksocknal_data.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; - ksocknal_data.ksnd_eager_ack = SOCKNAL_EAGER_ACK; - ksocknal_data.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; - ksocknal_data.ksnd_min_bulk = SOCKNAL_MIN_BULK; -#if SOCKNAL_ZC - ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; -#endif ksocknal_init_incarnation(); ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; @@ -1669,7 +1646,7 @@ ksocknal_module_init (void) PORTAL_ALLOC(ksocknal_data.ksnd_schedulers, sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } @@ -1685,15 +1662,19 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - ksocknal_module_fini (); + /* NB we have to wait to be told our true NID... */ + process_id.pid = 0; + process_id.nid = 0; + + rc = lib_init(&ksocknal_lib, process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { + CERROR("lib_init failed: error %d\n", rc); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PtlNIDebug(ksocknal_ni, ~0); - ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; // flag lib_init() called for (i = 0; i < SOCKNAL_N_SCHED; i++) { rc = ksocknal_thread_start (ksocknal_scheduler, @@ -1701,7 +1682,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1710,7 +1691,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i)); if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1718,7 +1699,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_reaper, NULL); if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } @@ -1728,7 +1709,7 @@ ksocknal_module_init (void) CDEBUG(D_NET, "Can't initialise routing interface " "(rc = %d): not routing\n", rc); } else { - /* Only allocate forwarding buffers if I'm on a gateway */ + /* Only allocate forwarding buffers if there's a router */ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { @@ -1744,7 +1725,7 @@ ksocknal_module_init (void) PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, fmb_kiov[pool->fmp_buff_pages])); if (fmb == NULL) { - ksocknal_module_fini(); + ksocknal_api_shutdown(&ksocknal_api); return (-ENOMEM); } @@ -1754,7 +1735,7 @@ ksocknal_module_init (void) fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL); if (fmb->fmb_kiov[j].kiov_page == NULL) { - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } @@ -1765,19 +1746,13 @@ ksocknal_module_init (void) } } - rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL); + rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PORTAL_SYMBOL_REGISTER(ksocknal_ni); - -#ifdef CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - ksocknal_data.ksnd_sysctl = register_sysctl_table (ksocknal_top_ctl_table, 0); -#endif /* flag everything initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; @@ -1789,6 +1764,75 @@ ksocknal_module_init (void) return (0); } +void __exit +ksocknal_module_fini (void) +{ +#ifdef CONFIG_SYSCTL + if (ksocknal_tunables.ksnd_sysctl != NULL) + unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl); +#endif + PtlNIFini(ksocknal_ni); + + ptl_unregister_nal(SOCKNAL); +} + +int __init +ksocknal_module_init (void) +{ + int rc; + + /* packet descriptor must fit in a router descriptor's scratchpad */ + LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); + /* the following must be sizeof(int) for proc_dointvec() */ + LASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int)); +#if SOCKNAL_ZC + LASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int)); +#endif + /* check ksnr_connected/connecting field large enough */ + LASSERT(SOCKNAL_CONN_NTYPES <= 4); + + ksocknal_api.startup = ksocknal_api_startup; + ksocknal_api.forward = ksocknal_api_forward; + ksocknal_api.shutdown = ksocknal_api_shutdown; + ksocknal_api.lock = ksocknal_api_lock; + ksocknal_api.unlock = ksocknal_api_unlock; + ksocknal_api.nal_data = &ksocknal_data; + + ksocknal_lib.nal_data = &ksocknal_data; + + /* Initialise dynamic tunables to defaults once only */ + ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; + ksocknal_tunables.ksnd_eager_ack = SOCKNAL_EAGER_ACK; + ksocknal_tunables.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; + ksocknal_tunables.ksnd_min_bulk = SOCKNAL_MIN_BULK; +#if SOCKNAL_ZC + ksocknal_tunables.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; +#endif + + rc = ptl_register_nal(SOCKNAL, &ksocknal_api); + if (rc != PTL_OK) { + CERROR("Can't register SOCKNAL: %d\n", rc); + return (-ENOMEM); /* or something... */ + } + + /* Pure gateways want the NAL started up at module load time... */ + rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { + ptl_unregister_nal(SOCKNAL); + return (-ENODEV); + } + +#ifdef CONFIG_SYSCTL + /* Press on regardless even if registering sysctl doesn't work */ + ksocknal_tunables.ksnd_sysctl = + register_sysctl_table (ksocknal_top_ctl_table, 0); +#endif + return (0); +} + MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01"); MODULE_LICENSE("GPL"); @@ -1796,4 +1840,3 @@ MODULE_LICENSE("GPL"); module_init(ksocknal_module_init); module_exit(ksocknal_module_fini); -EXPORT_SYMBOL (ksocknal_ni); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index bd3c1fba1e525698d00d425b4cdd02e39899442d..e1e3aaca7cacc5919d27ac694a95f68097ec693d 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -64,6 +64,7 @@ #include <linux/kpr.h> #include <portals/p30.h> #include <portals/lib-p30.h> +#include <portals/nal.h> #include <portals/socknal.h> #if CONFIG_SMP @@ -141,7 +142,6 @@ typedef struct { } ksock_irqinfo_t; typedef struct { - int ksnd_init; /* initialisation state */ int ksnd_io_timeout; /* "stuck" socket timeout (seconds) */ int ksnd_eager_ack; /* make TCP ack eagerly? */ int ksnd_typed_conns; /* drive sockets by type? */ @@ -150,6 +150,10 @@ typedef struct { unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */ #endif struct ctl_table_header *ksnd_sysctl; /* sysctl interface */ +} ksock_tunables_t; + +typedef struct { + int ksnd_init; /* initialisation state */ __u64 ksnd_incarnation; /* my epoch */ rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */ @@ -194,7 +198,7 @@ typedef struct { #define SOCKNAL_INIT_NOTHING 0 #define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_PTL 2 +#define SOCKNAL_INIT_LIB 2 #define SOCKNAL_INIT_ALL 3 /* A packet just assembled for transmission is represented by 1 or more @@ -362,6 +366,7 @@ typedef struct ksock_peer extern nal_cb_t ksocknal_lib; extern ksock_nal_data_t ksocknal_data; +extern ksock_tunables_t ksocknal_tunables; static inline struct list_head * ksocknal_nid2peerlist (ptl_nid_t nid) diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index ebb32da2bdecb9304f854f793387af1e7bcd6aff..861c07dcbee4692d72d43f25420a1592aa659ed2 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -262,7 +262,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) LASSERT (tx->tx_nkiov > 0); #if SOCKNAL_ZC - if (fragsize >= ksocknal_data.ksnd_zc_min_frag && + if (fragsize >= ksocknal_tunables.ksnd_zc_min_frag && (sock->sk->route_caps & NETIF_F_SG) && (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) { @@ -381,7 +381,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) * is set. Instead, we presume peer death has occurred if * the socket doesn't drain within a timout */ conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; conn->ksnc_peer->ksnp_last_alive = jiffies; } while (tx->tx_resid != 0); @@ -444,7 +444,7 @@ ksocknal_recv_iov (ksock_conn_t *conn) /* received something... */ conn->ksnc_peer->ksnp_last_alive = jiffies; conn->ksnc_rx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; @@ -503,7 +503,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) /* received something... */ conn->ksnc_peer->ksnp_last_alive = jiffies; conn->ksnc_rx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; @@ -562,7 +562,7 @@ ksocknal_receive (ksock_conn_t *conn) if (conn->ksnc_rx_nob_wanted == 0) { /* Completed a message segment (header or payload) */ - if ((ksocknal_data.ksnd_eager_ack & conn->ksnc_type) != 0 && + if ((ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0 && (conn->ksnc_rx_state == SOCKNAL_RX_BODY || conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) { /* Remind the socket to ack eagerly... */ @@ -723,7 +723,7 @@ ksocknal_launch_autoconnect_locked (ksock_route_t *route) LASSERT ((route->ksnr_connected & KSNR_TYPED_ROUTES) != KSNR_TYPED_ROUTES); LASSERT (!route->ksnr_connecting); - if (ksocknal_data.ksnd_typed_conns) + if (ksocknal_tunables.ksnd_typed_conns) route->ksnr_connecting = KSNR_TYPED_ROUTES & ~route->ksnr_connected; else @@ -797,7 +797,7 @@ ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) fnob = nob; } - if (!ksocknal_data.ksnd_typed_conns) + if (!ksocknal_tunables.ksnd_typed_conns) continue; switch (c->ksnc_type) { @@ -808,11 +808,11 @@ ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) case SOCKNAL_CONN_BULK_IN: continue; case SOCKNAL_CONN_BULK_OUT: - if (tx->tx_nob < ksocknal_data.ksnd_min_bulk) + if (tx->tx_nob < ksocknal_tunables.ksnd_min_bulk) continue; break; case SOCKNAL_CONN_CONTROL: - if (tx->tx_nob >= ksocknal_data.ksnd_min_bulk) + if (tx->tx_nob >= ksocknal_tunables.ksnd_min_bulk) continue; break; } @@ -856,7 +856,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) spin_lock_irqsave (&sched->kss_lock, flags); conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; mb(); /* order with list_add_tail */ list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); @@ -2182,7 +2182,7 @@ ksocknal_setup_sock (struct socket *sock) /* Keepalives: If 3/4 of the timeout elapses, start probing every * second until the timeout elapses. */ - option = (ksocknal_data.ksnd_io_timeout * 3) / 4; + option = (ksocknal_tunables.ksnd_io_timeout * 3) / 4; set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, (char *)&option, sizeof (option)); @@ -2202,7 +2202,7 @@ ksocknal_setup_sock (struct socket *sock) return (rc); } - option = ksocknal_data.ksnd_io_timeout / 4; + option = ksocknal_tunables.ksnd_io_timeout / 4; set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, (char *)&option, sizeof (option)); @@ -2259,7 +2259,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) /* Set the socket timeouts, so our connection attempt completes in * finite time */ - tv.tv_sec = ksocknal_data.ksnd_io_timeout; + tv.tv_sec = ksocknal_tunables.ksnd_io_timeout; tv.tv_usec = 0; set_fs (KERNEL_DS); @@ -2268,7 +2268,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) set_fs (oldmm); if (rc != 0) { CERROR ("Can't set send timeout %d: %d\n", - ksocknal_data.ksnd_io_timeout, rc); + ksocknal_tunables.ksnd_io_timeout, rc); goto out; } @@ -2278,7 +2278,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) set_fs (oldmm); if (rc != 0) { CERROR ("Can't set receive timeout %d: %d\n", - ksocknal_data.ksnd_io_timeout, rc); + ksocknal_tunables.ksnd_io_timeout, rc); goto out; } @@ -2652,9 +2652,9 @@ ksocknal_reaper (void *arg) * timeout on any connection within (n+1)/n times the * timeout interval. */ - if (ksocknal_data.ksnd_io_timeout > n * p) + if (ksocknal_tunables.ksnd_io_timeout > n * p) chunk = (chunk * n * p) / - ksocknal_data.ksnd_io_timeout; + ksocknal_tunables.ksnd_io_timeout; if (chunk == 0) chunk = 1; diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 914b78f9b385441f453ae3a12b6938c25a5caade..4e43aa57292027ffe15fe352a3d9ba2683ce3e04 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -949,19 +949,20 @@ void portals_run_lbug_upcall(char *file, const char *fn, const int line) char *portals_nid2str(int nal, ptl_nid_t nid, char *str) { switch(nal){ -/* XXX this should be a nal method of some sort */ +/* XXX this could be a nal method of some sort, 'cept it's config + * dependent whether (say) socknal NIDs are actually IP addresses... */ #ifndef CRAY_PORTALS case TCPNAL: /* userspace NAL */ case SOCKNAL: - sprintf(str, "%u:%d.%d.%d.%d", (__u32)(nid >> 32), - HIPQUAD(nid)); + snprintf(str, PTL_NALFMT_SIZE-1, + "%u:%d.%d.%d.%d", (__u32)(nid >> 32), HIPQUAD(nid)); break; case QSWNAL: case GMNAL: case IBNAL: case SCIMACNAL: - sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid); + snprintf(str, PTL_NALFMT_SIZE-1, LPD64, nid); break; #endif default: diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c index 9daa8e0b1fdca7848f08125f2ecc1db4264d813b..a53ea6b41e8bbec1b84ee8090f9e0cb7a91975e7 100644 --- a/lnet/libcfs/module.c +++ b/lnet/libcfs/module.c @@ -51,7 +51,13 @@ #define PORTAL_MINOR 240 -extern void (kping_client)(struct portal_ioctl_data *); +struct nal_cmd_handler { + nal_cmd_handler_fn *nch_handler; + void *nch_private; +}; + +static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; +static DECLARE_MUTEX(nal_cmd_sem); #ifdef PORTAL_DEBUG void kportal_assertion_failed(char *expr, char *file, const char *func, @@ -239,6 +245,62 @@ static inline void freedata(void *data, int len) PORTAL_FREE(data, len); } +int +libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private) +{ + int rc = 0; + + CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); + + if (nal > 0 && nal <= NAL_MAX_NR) { + down(&nal_cmd_sem); + if (nal_cmd[nal].nch_handler != NULL) + rc = -EBUSY; + else { + nal_cmd[nal].nch_handler = handler; + nal_cmd[nal].nch_private = private; + } + up(&nal_cmd_sem); + } + return rc; +} +EXPORT_SYMBOL(libcfs_nal_cmd_register); + +void +libcfs_nal_cmd_unregister(int nal) +{ + CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); + + LASSERT(nal > 0 && nal <= NAL_MAX_NR); + LASSERT(nal_cmd[nal].nch_handler != NULL); + + down(&nal_cmd_sem); + nal_cmd[nal].nch_handler = NULL; + nal_cmd[nal].nch_private = NULL; + up(&nal_cmd_sem); +} +EXPORT_SYMBOL(libcfs_nal_cmd_unregister); + +int +libcfs_nal_cmd(struct portals_cfg *pcfg) +{ + __u32 nal = pcfg->pcfg_nal; + int rc = -EINVAL; + ENTRY; + + down(&nal_cmd_sem); + if (nal > 0 && nal <= NAL_MAX_NR && + nal_cmd[nal].nch_handler != NULL) { + CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, + pcfg->pcfg_command); + rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private); + } + up(&nal_cmd_sem); + + RETURN(rc); +} +EXPORT_SYMBOL(libcfs_nal_cmd); + static DECLARE_RWSEM(ioctl_list_sem); static LIST_HEAD(ioctl_list); @@ -356,6 +418,27 @@ static int libcfs_ioctl(struct inode *inode, struct file *file, err = -EFAULT; break; #endif + case IOC_PORTAL_NAL_CMD: { + struct portals_cfg pcfg; + + LASSERT (data->ioc_plen1 == sizeof(pcfg)); + if (copy_from_user(&pcfg, (void *)data->ioc_pbuf1, + sizeof(pcfg))) { + err = -EFAULT; + break; + } + + CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, + pcfg.pcfg_command); + err = libcfs_nal_cmd(&pcfg); + + if (err == 0 && + copy_to_user((char *)data->ioc_pbuf1, &pcfg, + sizeof (pcfg))) + err = -EFAULT; + break; + } + case IOC_PORTAL_MEMHOG: if (!capable (CAP_SYS_ADMIN)) err = -EPERM; diff --git a/lnet/lnet/api-eq.c b/lnet/lnet/api-eq.c index 390156a3be065ca80d80ca2bb53e3e7b23917dbc..7fc95fabf62e689165791ee6a85fdd42c54a9097 100644 --- a/lnet/lnet/api-eq.c +++ b/lnet/lnet/api-eq.c @@ -25,28 +25,6 @@ #include <portals/api-support.h> -int ptl_eq_init(void) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_fini(void) -{ - /* Nothing to do anymore... */ -} - -int ptl_eq_ni_init(nal_t * nal) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_ni_fini(nal_t * nal) -{ - /* Nothing to do anymore... */ -} - int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev) { int new_index = eq->sequence & (eq->size - 1); diff --git a/lnet/lnet/api-errno.c b/lnet/lnet/api-errno.c index 0e155daa965e63d29e33cf7820ed7ac37004ba4a..1c01c88f9c08c90785de3926cd3bc363ab1a641c 100644 --- a/lnet/lnet/api-errno.c +++ b/lnet/lnet/api-errno.c @@ -36,8 +36,7 @@ const char *ptl_err_str[] = { "PTL_MD_NO_UPDATE", "PTL_FAIL", - "PTL_IOV_TOO_MANY", - "PTL_IOV_TOO_SMALL", + "PTL_IOV_INVALID", "PTL_EQ_IN_USE", diff --git a/lnet/lnet/api-init.c b/lnet/lnet/api-init.c index e41bad8668b37ec99108fbf3b2fab442d5c7a27a..08d615d8a23c0155f50a436de25bdd97311951bb 100644 --- a/lnet/lnet/api-init.c +++ b/lnet/lnet/api-init.c @@ -25,41 +25,20 @@ #include <portals/api-support.h> -int ptl_init; - -int __p30_initialized; -int __p30_myr_initialized; -int __p30_ip_initialized; -ptl_handle_ni_t __myr_ni_handle; -ptl_handle_ni_t __ip_ni_handle; - int PtlInit(int *max_interfaces) { if (max_interfaces != NULL) - *max_interfaces = NAL_ENUM_END_MARKER; - - if (ptl_init) - return PTL_OK; + *max_interfaces = NAL_MAX_NR; LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO")); - ptl_ni_init(); - ptl_me_init(); - ptl_eq_init(); - ptl_init = 1; - - return PTL_OK; + return ptl_ni_init(); } void PtlFini(void) { - - /* Reverse order of initialization */ - ptl_eq_fini(); - ptl_me_fini(); ptl_ni_fini(); - ptl_init = 0; } diff --git a/lnet/lnet/api-me.c b/lnet/lnet/api-me.c index e724e5859307e2627908349b5c8f59903e5a1513..219aa5e08191ffb70e53894e8331b01a0138b1ff 100644 --- a/lnet/lnet/api-me.c +++ b/lnet/lnet/api-me.c @@ -25,18 +25,3 @@ #include <portals/api-support.h> -int ptl_me_init(void) -{ - return PTL_OK; -} -void ptl_me_fini(void) -{ /* Nothing to do */ -} -int ptl_me_ni_init(nal_t * nal) -{ - return PTL_OK; -} - -void ptl_me_ni_fini(nal_t * nal) -{ /* Nothing to do... */ -} diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 02082c678b93a605fd9c85662bf1e9a3c323f231..81afd0a5ee70af6b653804dda6bdb70837204e60 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -25,13 +25,36 @@ #include <portals/api-support.h> +int ptl_init; + /* Put some magic in the NI handle so uninitialised/zeroed handles are easy * to spot */ #define NI_HANDLE_MAGIC 0xebc0de00 #define NI_HANDLE_MASK 0x000000ff -#define MAX_NIS 8 -static nal_t *ptl_interfaces[MAX_NIS]; -int ptl_num_interfaces = 0; + +static struct nal_t *ptl_nal_table[NAL_MAX_NR]; + +#ifdef __KERNEL__ +DECLARE_MUTEX(ptl_mutex); + +static void ptl_mutex_enter (void) +{ + down (&ptl_mutex); +} + +static void ptl_mutex_exit (void) +{ + up (&ptl_mutex); +} +#else +static void ptl_mutex_enter (void) +{ +} + +static void ptl_mutex_exit (void) +{ +} +#endif nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) { @@ -46,147 +69,188 @@ nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) return NULL; idx &= NI_HANDLE_MASK; - if (idx < MAX_NIS) - return ptl_interfaces[idx]; + + if (idx >= NAL_MAX_NR || + ptl_nal_table[idx] == NULL || + ptl_nal_table[idx]->nal_refct == 0) + return NULL; - return NULL; + return ptl_nal_table[idx]; } -int ptl_ni_init(void) +int ptl_register_nal (ptl_interface_t interface, nal_t *nal) { - int i; - - LASSERT (MAX_NIS <= (NI_HANDLE_MASK + 1)); + int rc; - for (i = 0; i < MAX_NIS; i++) - ptl_interfaces[i] = NULL; + ptl_mutex_enter(); + + if (interface < 0 || interface >= NAL_MAX_NR) + rc = PTL_IFACE_INVALID; + else if (ptl_nal_table[interface] != NULL) + rc = PTL_IFACE_DUP; + else { + rc = PTL_OK; + ptl_nal_table[interface] = nal; + LASSERT(nal->nal_refct == 0); + } - return PTL_OK; + ptl_mutex_exit(); + return (rc); } -void ptl_ni_fini(void) +void ptl_unregister_nal (ptl_interface_t interface) { - int i; - - for (i = 0; i < MAX_NIS; i++) { - nal_t *nal = ptl_interfaces[i]; - if (!nal) - continue; + LASSERT(interface >= 0 && interface < NAL_MAX_NR); + LASSERT(ptl_nal_table[interface] != NULL); + LASSERT(ptl_nal_table[interface]->nal_refct == 0); + + ptl_mutex_enter(); + + ptl_nal_table[interface] = NULL; - if (nal->shutdown) - nal->shutdown(nal, i); - } + ptl_mutex_exit(); } -#ifdef __KERNEL__ -DECLARE_MUTEX(ptl_ni_init_mutex); - -static void ptl_ni_init_mutex_enter (void) +int ptl_ni_init(void) { - down (&ptl_ni_init_mutex); -} + /* If this assertion fails, we need more bits in NI_HANDLE_MASK and + * to shift NI_HANDLE_MAGIC left appropriately */ + LASSERT (NAL_MAX_NR <= (NI_HANDLE_MASK + 1)); + + ptl_mutex_enter(); + + if (!ptl_init) { + /* NULL pointers, clear flags */ + memset(ptl_nal_table, 0, sizeof(ptl_nal_table)); +#ifndef __KERNEL__ + /* Kernel NALs register themselves when their module loads, + * and unregister themselves when their module is unloaded. + * Userspace NALs, are plugged in explicitly here... */ + { + extern nal_t procapi_nal; + + /* XXX pretend it's socknal to keep liblustre happy... */ + ptl_nal_table[SOCKNAL] = &procapi_nal; + LASSERT (procapi_nal.nal_refct == 0); + } +#endif + ptl_init = 1; + } -static void ptl_ni_init_mutex_exit (void) -{ - up (&ptl_ni_init_mutex); + ptl_mutex_exit(); + + return PTL_OK; } -#else -static void ptl_ni_init_mutex_enter (void) +void ptl_ni_fini(void) { -} + nal_t *nal; + int i; + + ptl_mutex_enter(); + + if (ptl_init) { + for (i = 0; i < NAL_MAX_NR; i++) { + + nal = ptl_nal_table[i]; + if (nal == NULL) + continue; + + if (nal->nal_refct != 0) { + CWARN("NAL %d has outstanding refcount %d\n", + i, nal->nal_refct); + nal->shutdown(nal); + } + + ptl_nal_table[i] = NULL; + } -static void ptl_ni_init_mutex_exit (void) -{ + ptl_init = 0; + } + + ptl_mutex_exit(); } -#endif - -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, ptl_pid_t requested_pid, - ptl_handle_ni_t * handle) +int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, + ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, + ptl_handle_ni_t *handle) { nal_t *nal; - int i; + int i; + int rc; if (!ptl_init) return PTL_NO_INIT; - ptl_ni_init_mutex_enter (); + ptl_mutex_enter (); - nal = interface(ptl_num_interfaces, ptl_size, acl_size, requested_pid); - - if (!nal) { - ptl_ni_init_mutex_exit (); - return PTL_NAL_FAILED; + if (interface == PTL_IFACE_DEFAULT) { + for (i = 0; i < NAL_MAX_NR; i++) + if (ptl_nal_table[i] != NULL) { + interface = i; + break; + } + /* NB if no interfaces are registered, 'interface' will + * fail the valid test below */ } - - for (i = 0; i < ptl_num_interfaces; i++) { - if (ptl_interfaces[i] == nal) { - nal->refct++; - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | i; - CDEBUG(D_OTHER, "Returning existing NAL (%d)\n", i); - ptl_ni_init_mutex_exit (); - return PTL_OK; - } + + if (interface < 0 || + interface >= NAL_MAX_NR || + ptl_nal_table[interface] == NULL) { + GOTO(out, rc = PTL_IFACE_INVALID); } - nal->refct = 1; - if (ptl_num_interfaces >= MAX_NIS) { - if (nal->shutdown) - nal->shutdown (nal, ptl_num_interfaces); - ptl_ni_init_mutex_exit (); - return PTL_NO_SPACE; - } + nal = ptl_nal_table[interface]; - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces; - ptl_interfaces[ptl_num_interfaces++] = nal; + CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct); + rc = nal->startup(nal, requested_pid, desired_limits, actual_limits); - ptl_eq_ni_init(nal); - ptl_me_ni_init(nal); + if (rc != PTL_OK) { + CERROR("Error %d starting up NAL %d, refs %d\n", rc, + interface, nal->nal_refct); + GOTO(out, rc); + } + + if (nal->nal_refct != 0) { + /* Caller gets to know if this was the first ref or not */ + rc = PTL_IFACE_DUP; + } + + nal->nal_refct++; + handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface; - ptl_ni_init_mutex_exit (); - return PTL_OK; + out: + ptl_mutex_exit (); + return rc; } - int PtlNIFini(ptl_handle_ni_t ni) { nal_t *nal; - int idx; - int rc; + int idx; if (!ptl_init) return PTL_NO_INIT; - ptl_ni_init_mutex_enter (); + ptl_mutex_enter (); nal = ptl_hndl2nal (&ni); if (nal == NULL) { - ptl_ni_init_mutex_exit (); + ptl_mutex_exit (); return PTL_HANDLE_INVALID; } idx = ni.nal_idx & NI_HANDLE_MASK; - nal->refct--; - if (nal->refct > 0) { - ptl_ni_init_mutex_exit (); - return PTL_OK; - } - - ptl_me_ni_fini(nal); - ptl_eq_ni_fini(nal); + LASSERT(nal->nal_refct > 0); - rc = PTL_OK; - if (nal->shutdown) - rc = nal->shutdown(nal, idx); + nal->nal_refct--; - ptl_interfaces[idx] = NULL; - ptl_num_interfaces--; + /* nal_refct == 0 tells nal->shutdown to really shut down */ + nal->shutdown(nal); - ptl_ni_init_mutex_exit (); - return rc; + ptl_mutex_exit (); + return PTL_OK; } int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out) diff --git a/lnet/lnet/api-wrap.c b/lnet/lnet/api-wrap.c index 9c82c30cd679ffa98d4b54ee69ca0b9735959604..3e6f9ce62f65c0928456fbaae9df3742e4fd75d1 100644 --- a/lnet/lnet/api-wrap.c +++ b/lnet/lnet/api-wrap.c @@ -124,25 +124,6 @@ int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, return ret.rc; } - - -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in) -{ - PtlNIDebug_in args; - PtlNIDebug_out ret; - int rc; - - args.mask_in = mask_in; - - rc = do_forward(ni, PTL_NIDEBUG, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - return ret.rc; -} - int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, @@ -255,45 +236,6 @@ int PtlMEDump(ptl_handle_me_t current_in) return ret.rc; } -static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in) -{ - nal_t *nal; - int rc; - int i; - - if (!ptl_init) { - CERROR("PtlMDAttach/Bind/Update: Not initialized\n"); - return PTL_NO_INIT; - } - - nal = ptl_hndl2nal(¤t_in); - if (!nal) - return PTL_HANDLE_INVALID; - - if (nal->validate != NULL) /* nal->validate not a NOOP */ - { - if ((md_in.options & PTL_MD_IOVEC) == 0) /* contiguous */ - { - rc = nal->validate (nal, md_in.start, md_in.length); - if (rc) - return (PTL_SEGV); - } - else - { - struct iovec *iov = (struct iovec *)md_in.start; - - for (i = 0; i < md_in.niov; i++, iov++) - { - rc = nal->validate (nal, iov->iov_base, iov->iov_len); - if (rc) - return (PTL_SEGV); - } - } - } - - return 0; -} - static ptl_handle_eq_t md2eq (ptl_md_t *md) { if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE)) @@ -310,16 +252,13 @@ int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in, PtlMDAttach_out ret; int rc; - rc = validate_md(me_in, md_in); - if (rc == PTL_OK) { - args.eq_in = md2eq(&md_in); - args.me_in = me_in; - args.md_in = md_in; - args.unlink_in = unlink_in; + args.eq_in = md2eq(&md_in); + args.me_in = me_in; + args.md_in = md_in; + args.unlink_in = unlink_in; - rc = do_forward(me_in, PTL_MDATTACH, - &args, sizeof(args), &ret, sizeof(ret)); - } + rc = do_forward(me_in, PTL_MDATTACH, + &args, sizeof(args), &ret, sizeof(ret)); if (rc != PTL_OK) return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc; @@ -340,10 +279,6 @@ int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, PtlMDBind_out ret; int rc; - rc = validate_md(ni_in, md_in); - if (rc != PTL_OK) - return rc; - args.eq_in = md2eq(&md_in); args.ni_in = ni_in; args.md_in = md_in; @@ -378,9 +313,6 @@ int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout, args.old_inout_valid = 0; if (new_inout) { - rc = validate_md (md_in, *new_inout); - if (rc != PTL_OK) - return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc; args.new_inout = *new_inout; args.new_inout_valid = 1; } else @@ -423,7 +355,7 @@ int PtlMDUnlink(ptl_handle_md_t md_in) } int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, - int (*callback) (ptl_event_t * event), + ptl_eq_handler_t callback, ptl_handle_eq_t * handle_out) { ptl_eq_t *eq = NULL; @@ -458,12 +390,6 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, for (i = 0; i < count; i++) ev[i].sequence = 0; - if (nal->validate != NULL) { - rc = nal->validate(nal, ev, count * sizeof(ptl_event_t)); - if (rc != PTL_OK) - goto fail; - } - args.ni_in = interface; args.count_in = count; args.base_in = ev; diff --git a/lnet/lnet/lib-dispatch.c b/lnet/lnet/lib-dispatch.c index 13036c7bc537c82a317dfc2c00571e8c63f25f4a..798e1173357d18643b4582df2234997209b3a20f 100644 --- a/lnet/lnet/lib-dispatch.c +++ b/lnet/lnet/lib-dispatch.c @@ -35,7 +35,6 @@ static dispatch_table_t dispatch_table[] = { [PTL_GETID] {do_PtlGetId, "PtlGetId"}, [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"}, [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"}, - [PTL_NIDEBUG] {do_PtlNIDebug, "PtlNIDebug"}, [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"}, [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"}, [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"}, diff --git a/lnet/lnet/lib-init.c b/lnet/lnet/lib-init.c index 61ef465d061ac0396b519094d51baf2f50fd4cfa..c62dbc2d5bd751730d795a294f97d988d49afea5 100644 --- a/lnet/lnet/lib-init.c +++ b/lnet/lnet/lib-init.c @@ -41,8 +41,15 @@ #ifndef PTL_USE_LIB_FREELIST int -kportal_descriptor_setup (nal_cb_t *nal) +kportal_descriptor_setup (nal_cb_t *nal, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { + /* Ignore requested limits! */ + actual_limits->max_mes = INT_MAX; + actual_limits->max_mds = INT_MAX; + actual_limits->max_eqs = INT_MAX; + return PTL_OK; } @@ -100,7 +107,9 @@ lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl) } int -kportal_descriptor_setup (nal_cb_t *nal) +kportal_descriptor_setup (nal_cb_t *nal, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { /* NB on failure caller must still call kportal_descriptor_cleanup */ /* ****** */ @@ -111,6 +120,13 @@ kportal_descriptor_setup (nal_cb_t *nal) memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds)); memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs)); + /* Ignore requested limits! */ + actual_limits->max_mes = MAX_MES; + actual_limits->max_mds = MAX_MDS; + actual_limits->max_eqs = MAX_EQS; + /* Hahahah what a load of bollocks. There's nowhere to + * specify the max # messages in-flight */ + rc = lib_freelist_init (nal, &nal->ni.ni_free_mes, MAX_MES, sizeof (lib_me_t)); if (rc != PTL_OK) @@ -248,21 +264,18 @@ lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh) } int -lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size) +lib_init(nal_cb_t *nal, ptl_process_id_t process_id, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { int rc = PTL_OK; lib_ni_t *ni = &nal->ni; + int ptl_size; int i; ENTRY; /* NB serialised in PtlNIInit() */ - if (ni->refcnt != 0) { /* already initialised */ - ni->refcnt++; - goto out; - } - lib_assert_wire_constants (); /* @@ -271,7 +284,8 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, */ memset(&ni->counters, 0, sizeof(lib_counters_t)); - rc = kportal_descriptor_setup (nal); + rc = kportal_descriptor_setup (nal, requested_limits, + &ni->actual_limits); if (rc != PTL_OK) goto out; @@ -287,12 +301,15 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, if (rc != PTL_OK) goto out; - ni->nid = nid; - ni->pid = pid; + ni->nid = process_id.nid; + ni->pid = process_id.pid; - ni->num_nodes = gsize; - ni->tbl.size = ptl_size; + if (requested_limits != NULL) + ptl_size = requested_limits->max_pt_index + 1; + else + ptl_size = 64; + ni->tbl.size = ptl_size; ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size); if (ni->tbl.tbl == NULL) { rc = PTL_NO_SPACE; @@ -302,9 +319,20 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, for (i = 0; i < ptl_size; i++) INIT_LIST_HEAD(&(ni->tbl.tbl[i])); - ni->debug = PTL_DEBUG_NONE; - ni->up = 1; - ni->refcnt++; + /* max_{mes,mds,eqs} set in kportal_descriptor_setup */ + + /* We don't have an access control table! */ + ni->actual_limits.max_ac_index = -1; + + ni->actual_limits.max_pt_index = ptl_size - 1; + ni->actual_limits.max_md_iovecs = PTL_MD_MAX_IOV; + ni->actual_limits.max_me_list = INT_MAX; + + /* We don't support PtlGetPut! */ + ni->actual_limits.max_getput_md = 0; + + if (actual_limits != NULL) + *actual_limits = ni->actual_limits; out: if (rc != PTL_OK) { @@ -321,12 +349,7 @@ lib_fini(nal_cb_t * nal) lib_ni_t *ni = &nal->ni; int idx; - ni->refcnt--; - - if (ni->refcnt != 0) - goto out; - - /* NB no stat_lock() since this is the last reference. The NAL + /* NB no state_lock() since this is the last reference. The NAL * should have shut down already, so it should be safe to unlink * and free all descriptors, even those that appear committed to a * network op (eg MD with non-zero pending count) @@ -370,11 +393,9 @@ lib_fini(nal_cb_t * nal) } nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size); - ni->up = 0; lib_cleanup_handle_hash (nal); kportal_descriptor_cleanup (nal); - out: return (PTL_OK); } diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c index 9a391cde5c94e43ec2599d62787977398ffb98ff..64a55b93556d6b2e4dc0649db5bc696b3940ba0f 100644 --- a/lnet/lnet/lib-md.c +++ b/lnet/lnet/lib-md.c @@ -86,6 +86,7 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, lib_eq_t *eq = NULL; int rc; int i; + int niov; /* NB we are passed an allocated, but uninitialised/active md. * if we return success, caller may lib_md_unlink() it. @@ -101,7 +102,7 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, /* Must check this _before_ allocation. Also, note that non-iov * MDs must set md_niov to 0. */ LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 || - md->niov <= PTL_MD_MAX_IOV); + md->length <= PTL_MD_MAX_IOV); /* This implementation doesn't know how to create START events or * disable END events. Best to LASSERT our caller is compliant so @@ -116,7 +117,6 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, new->me = NULL; new->start = md->start; - new->length = md->length; new->offset = 0; new->max_size = md->max_size; new->options = md->options; @@ -132,13 +132,13 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */ return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.iov, md->start, - md->niov * sizeof (new->md_iov.iov[0]))) + niov * sizeof (new->md_iov.iov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the base address on trust */ if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */ return PTL_VAL_FAILED; @@ -146,11 +146,10 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.iov[i].iov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - + new->length = total_length; + if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); @@ -166,13 +165,13 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, nal->cb_recv_pages == NULL) return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.kiov, md->start, - md->niov * sizeof (new->md_iov.kiov[0]))) + niov * sizeof (new->md_iov.kiov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the page pointer on trust */ if (new->md_iov.kiov[i].kiov_offset + new->md_iov.kiov[i].kiov_len > PAGE_SIZE ) @@ -181,23 +180,23 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.kiov[i].kiov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; + new->length = total_length; if (nal->cb_map_pages != NULL) { - rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov, + rc = nal->cb_map_pages (nal, niov, new->md_iov.kiov, &new->md_addrkey); if (rc != PTL_OK) return (rc); } #endif } else { /* contiguous */ - new->md_niov = 1; + new->length = md->length; + new->md_niov = niov = 1; new->md_iov.iov[0].iov_base = md->start; new->md_iov.iov[0].iov_len = md->length; if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); @@ -223,13 +222,13 @@ void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new) * and that's all. */ new->start = md->start; - new->length = md->length; + new->length = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? + md->length : md->md_niov; new->threshold = md->threshold; new->max_size = md->max_size; new->options = md->options; new->user_ptr = md->user_ptr; ptl_eq2handle(&new->eventq, md->eq); - new->niov = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov; } int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) @@ -251,8 +250,8 @@ int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) unsigned long flags; if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) @@ -303,8 +302,8 @@ int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret) unsigned long flags; if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) @@ -407,23 +406,16 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, goto out; } - /* XXX fttb, the new MD must be the same type wrt fragmentation */ - if (((new->options ^ md->options) & - (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) { - ret->rc = PTL_MD_INVALID; - goto out; - } - - if (new->niov > md->md_niov) { - ret->rc = PTL_IOV_TOO_MANY; + /* XXX fttb, the new MD must be the same "shape" wrt fragmentation, + * since we simply overwrite the old lib-md */ + if ((((new->options ^ md->options) & + (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) || + ((new->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && + new->length != md->md_niov)) { + ret->rc = PTL_IOV_INVALID; goto out; } - if (new->niov < md->md_niov) { - ret->rc = PTL_IOV_TOO_SMALL; - goto out; - } - if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) { test_eq = ptl_handle2eq(&args->testq_in, nal); if (test_eq == NULL) { diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c index e3c46ea154b55cf0c075423e28f935867f1989cd..271fc820c83a0dc95f7da912c6b5c8c622cc65f7 100644 --- a/lnet/lnet/lib-me.c +++ b/lnet/lnet/lib-me.c @@ -146,13 +146,6 @@ int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) /* call with state_lock please */ void lib_me_unlink(nal_cb_t *nal, lib_me_t *me) { - lib_ni_t *ni = &nal->ni; - - if (ni->debug & PTL_DEBUG_UNLINK) { - ptl_handle_any_t handle; - ptl_me2handle(&handle, me); - } - list_del (&me->me_list); if (me->md) { diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index 869c9d693f330da14abee12f30ddc9c172222748..1b69533f09e9161d715a130a1e26abe30ebfd44f 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -89,10 +89,6 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status) int rc; ptl_hdr_t ack; - /* ni went down while processing this message */ - if (nal->ni.up == 0) - return; - if (msg == NULL) return; diff --git a/lnet/lnet/lib-ni.c b/lnet/lnet/lib-ni.c index 296bc4a2ac3b1d9846699f161a668ded9d9ffe35..aa959fcf88c607e362b3ce32959b8f60ae7fcbe2 100644 --- a/lnet/lnet/lib-ni.c +++ b/lnet/lnet/lib-ni.c @@ -29,18 +29,6 @@ #define MAX_DIST 18446744073709551615ULL -int do_PtlNIDebug(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlNIDebug_in *args = v_args; - PtlNIDebug_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - - ret->rc = ni->debug; - ni->debug = args->mask_in; - - return 0; -} - int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret) { /* diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 012d3d91083ae236a21cbebbff88083c129be0ad..40e9da4369fe1c708723fe233a0fa61a3615c085 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -46,273 +46,17 @@ #include <portals/lib-p30.h> #include <portals/p30.h> +#include <portals/nal.h> #include <linux/kp30.h> #include <linux/kpr.h> #include <linux/portals_compat25.h> extern void (kping_client)(struct portal_ioctl_data *); -struct nal_cmd_handler { - nal_cmd_handler_t nch_handler; - void * nch_private; -}; - -static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; -static DECLARE_MUTEX(nal_cmd_sem); - - -static int -kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_del_route(int gw_nalid, ptl_nid_t gw_nid, - ptl_nid_t lo, ptl_nid_t hi) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid, - int alive, time_t when) -{ - int rc; - kpr_control_interface_t *ci; - - /* No error if router not preset. Sysadmin is allowed to notify - * _everywhere_ when a NID boots or crashes, even if they know - * nothing of the peer. */ - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (0); - - rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp, - ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep) -{ - int gateway_nalid; - ptl_nid_t gateway_nid; - ptl_nid_t lo_nid; - ptl_nid_t hi_nid; - int alive; - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid, - &lo_nid, &hi_nid, &alive); - - if (rc == 0) { - CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n", - index, gateway_nalid, gateway_nid, lo_nid, hi_nid, - alive ? "up" : "down"); - - *gateway_nalidp = (__u32)gateway_nalid; - *gateway_nidp = gateway_nid; - *lo_nidp = lo_nid; - *hi_nidp = hi_nid; - *alivep = alive; - } - - PORTAL_SYMBOL_PUT (kpr_control_interface); - return (rc); -} - -static int -kportal_router_cmd(struct portals_cfg *pcfg, void * private) -{ - int err = -EINVAL; - ENTRY; - - switch(pcfg->pcfg_command) { - default: - CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command); - break; - - case NAL_CMD_ADD_ROUTE: - CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - - case NAL_CMD_DEL_ROUTE: - CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - - case NAL_CMD_NOTIFY_ROUTER: { - CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags ? "Enabling" : "Disabling", - (time_t)pcfg->pcfg_nid3); - - err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags, - (time_t)pcfg->pcfg_nid3); - break; - } - - case NAL_CMD_GET_ROUTE: - CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count); - err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal, - &pcfg->pcfg_nid, - &pcfg->pcfg_nid2, &pcfg->pcfg_nid3, - &pcfg->pcfg_flags); - break; - } - RETURN(err); -} - -int -kportal_nal_cmd(struct portals_cfg *pcfg) -{ - __u32 nal = pcfg->pcfg_nal; - int rc = -EINVAL; - - ENTRY; - - down(&nal_cmd_sem); - if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) { - CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, - pcfg->pcfg_command); - rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private); - } - up(&nal_cmd_sem); - RETURN(rc); -} - -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - return (PORTAL_SYMBOL_GET(kqswnal_ni)); - case SOCKNAL: - return (PORTAL_SYMBOL_GET(ksocknal_ni)); - case GMNAL: - return (PORTAL_SYMBOL_GET(kgmnal_ni)); - case IBNAL: - return (PORTAL_SYMBOL_GET(kibnal_ni)); - case TCPNAL: - /* userspace NAL */ - return (NULL); - case SCIMACNAL: - return (PORTAL_SYMBOL_GET(kscimacnal_ni)); - default: - /* A warning to a naive caller */ - CERROR ("unknown nal: %d\n", nal); - return (NULL); - } -} - -void -kportal_put_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - PORTAL_SYMBOL_PUT(kqswnal_ni); - break; - case SOCKNAL: - PORTAL_SYMBOL_PUT(ksocknal_ni); - break; - case GMNAL: - PORTAL_SYMBOL_PUT(kgmnal_ni); - break; - case IBNAL: - PORTAL_SYMBOL_PUT(kibnal_ni); - break; - case TCPNAL: - /* A lesson to a malicious caller */ - LBUG (); - case SCIMACNAL: - PORTAL_SYMBOL_PUT(kscimacnal_ni); - break; - default: - CERROR ("unknown nal: %d\n", nal); - } -} - -int -kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - if (nal_cmd[nal].nch_handler != NULL) - rc = -EBUSY; - else { - nal_cmd[nal].nch_handler = handler; - nal_cmd[nal].nch_private = private; - } - up(&nal_cmd_sem); - } - return rc; -} - -int -kportal_nal_unregister(int nal) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - nal_cmd[nal].nch_handler = NULL; - nal_cmd[nal].nch_private = NULL; - up(&nal_cmd_sem); - } - return rc; -} - static int kportal_ioctl(struct portal_ioctl_data *data, unsigned int cmd, unsigned long arg) { - int err = 0; + int err; char str[PTL_NALFMT_SIZE]; ENTRY; @@ -334,68 +78,53 @@ static int kportal_ioctl(struct portal_ioctl_data *data, } case IOC_PORTAL_GET_NID: { - const ptl_handle_ni_t *nip; - ptl_process_id_t pid; + ptl_handle_ni_t nih; + ptl_process_id_t pid; CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) + err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + if (!(err == PTL_OK || err == PTL_IFACE_DUP)) RETURN (-EINVAL); - err = PtlGetId (*nip, &pid); + err = PtlGetId (nih, &pid); LASSERT (err == PTL_OK); - kportal_put_ni (data->ioc_nal); + + PtlNIFini(nih); data->ioc_nid = pid.nid; if (copy_to_user ((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; + RETURN (-EFAULT); + RETURN(0); } - case IOC_PORTAL_NAL_CMD: { - struct portals_cfg pcfg; - - LASSERT (data->ioc_plen1 == sizeof(pcfg)); - err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1, - sizeof(pcfg)); - if ( err ) { - EXIT; - return err; - } - - CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, - pcfg.pcfg_command); - err = kportal_nal_cmd(&pcfg); - if (err == 0) { - if (copy_to_user((char *)data->ioc_pbuf1, &pcfg, - sizeof (pcfg))) - err = -EFAULT; - if (copy_to_user((char *)arg, data, sizeof (*data))) - err = -EFAULT; - } - break; - } case IOC_PORTAL_FAIL_NID: { - const ptl_handle_ni_t *nip; + ptl_handle_ni_t nih; CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", data->ioc_nal, data->ioc_nid, data->ioc_count); - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) + err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + if (!(err == PTL_OK || err == PTL_IFACE_DUP)) return (-EINVAL); - err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count); - kportal_put_ni (data->ioc_nal); - break; + if (err == PTL_OK) { + /* There's no point in failing an interface that + * came into existance just for this */ + err = -EINVAL; + } else { + err = PtlFailNid (nih, data->ioc_nid, data->ioc_count); + if (err != PTL_OK) + err = -EINVAL; + } + + PtlNIFini(nih); + RETURN (err); } default: - err = -EINVAL; - break; + RETURN(-EINVAL); } - - RETURN(err); + /* Not Reached */ } DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl); @@ -411,30 +140,24 @@ static int init_kportals_module(void) RETURN(rc); } - rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL); - if (rc) { - PtlFini(); - CERROR("kportal_nal_registre: ROUTER error %d\n", rc); - } - - if (rc == 0) - libcfs_register_ioctl(&kportal_ioctl_handler); + rc = libcfs_register_ioctl(&kportal_ioctl_handler); + LASSERT (rc == 0); RETURN(rc); } static void exit_kportals_module(void) { - libcfs_deregister_ioctl(&kportal_ioctl_handler); - kportal_nal_unregister(ROUTER); + int rc; + + rc = libcfs_deregister_ioctl(&kportal_ioctl_handler); + LASSERT (rc == 0); + PtlFini(); } -EXPORT_SYMBOL(kportal_nal_register); -EXPORT_SYMBOL(kportal_nal_unregister); -EXPORT_SYMBOL(kportal_get_ni); -EXPORT_SYMBOL(kportal_put_ni); -EXPORT_SYMBOL(kportal_nal_cmd); +EXPORT_SYMBOL(ptl_register_nal); +EXPORT_SYMBOL(ptl_unregister_nal); EXPORT_SYMBOL(ptl_err_str); EXPORT_SYMBOL(lib_dispatch); @@ -446,7 +169,6 @@ EXPORT_SYMBOL(PtlMDAttach); EXPORT_SYMBOL(PtlMDUnlink); EXPORT_SYMBOL(PtlNIInit); EXPORT_SYMBOL(PtlNIFini); -EXPORT_SYMBOL(PtlNIDebug); EXPORT_SYMBOL(PtlInit); EXPORT_SYMBOL(PtlFini); EXPORT_SYMBOL(PtlSnprintHandle); diff --git a/lnet/router/router.c b/lnet/router/router.c index 27aab67a9769749f891078a104246ee377317874..9fb6afef4c1ff2aae0cc242e95c656b09cf79895 100644 --- a/lnet/router/router.c +++ b/lnet/router/router.c @@ -48,13 +48,6 @@ kpr_router_interface_t kpr_router_interface = { kprri_deregister: kpr_deregister_nal, }; -kpr_control_interface_t kpr_control_interface = { - kprci_add_route: kpr_add_route, - kprci_del_route: kpr_del_route, - kprci_get_route: kpr_get_route, - kprci_notify: kpr_sys_notify, -}; - int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) { @@ -637,7 +630,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, int kpr_sys_notify (int gateway_nalid, ptl_nid_t gateway_nid, - int alive, time_t when) + int alive, time_t when) { return (kpr_do_notify (0, gateway_nalid, gateway_nid, alive, when)); } @@ -696,8 +689,8 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, } int -kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive) +kpr_get_route (int idx, __u32 *gateway_nalid, ptl_nid_t *gateway_nid, + ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, __u32 *alive) { struct list_head *e; @@ -725,11 +718,67 @@ kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, return (-ENOENT); } +static int +kpr_nal_cmd(struct portals_cfg *pcfg, void * private) +{ + int err = -EINVAL; + ENTRY; + + switch(pcfg->pcfg_command) { + default: + CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command); + break; + + case NAL_CMD_ADD_ROUTE: + CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", + pcfg->pcfg_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + break; + + case NAL_CMD_DEL_ROUTE: + CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", + pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + break; + + case NAL_CMD_NOTIFY_ROUTER: { + CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", + pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_flags ? "Enabling" : "Disabling", + (time_t)pcfg->pcfg_nid3); + + err = kpr_sys_notify (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_flags, (time_t)pcfg->pcfg_nid3); + break; + } + + case NAL_CMD_GET_ROUTE: + CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count); + err = kpr_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal, + &pcfg->pcfg_nid, + &pcfg->pcfg_nid2, &pcfg->pcfg_nid3, + &pcfg->pcfg_flags); + break; + } + RETURN(err); +} + + static void /*__exit*/ kpr_finalise (void) { LASSERT (list_empty (&kpr_nals)); + libcfs_nal_cmd_unregister(ROUTER); + + PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); + + kpr_proc_fini(); + while (!list_empty (&kpr_routes)) { kpr_route_entry_t *re = list_entry(kpr_routes.next, kpr_route_entry_t, @@ -739,11 +788,6 @@ kpr_finalise (void) PORTAL_FREE(re, sizeof (*re)); } - kpr_proc_fini(); - - PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); - PORTAL_SYMBOL_UNREGISTER(kpr_control_interface); - CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n", atomic_read(&portal_kmemory)); } @@ -751,13 +795,20 @@ kpr_finalise (void) static int __init kpr_initialise (void) { + int rc; + CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", atomic_read(&portal_kmemory)); kpr_proc_init(); + rc = libcfs_nal_cmd_register(ROUTER, kpr_nal_cmd, NULL); + if (rc != 0) { + CERROR("Can't register nal cmd handler\n"); + return (rc); + } + PORTAL_SYMBOL_REGISTER(kpr_router_interface); - PORTAL_SYMBOL_REGISTER(kpr_control_interface); return (0); } @@ -768,5 +819,4 @@ MODULE_LICENSE("GPL"); module_init (kpr_initialise); module_exit (kpr_finalise); -EXPORT_SYMBOL (kpr_control_interface); EXPORT_SYMBOL (kpr_router_interface); diff --git a/lnet/router/router.h b/lnet/router/router.h index 309025b3f6f76c42c4a43c110253f7cadd31e35a..0787064c502a4515eb4d9db0180b9724bb7b7ffb 100644 --- a/lnet/router/router.h +++ b/lnet/router/router.h @@ -93,15 +93,6 @@ extern void kpr_deregister_nal (void *arg); extern void kpr_proc_init (void); extern void kpr_proc_fini (void); -extern int kpr_add_route (int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); -extern int kpr_del_route (int gw_nal, ptl_nid_t gw_nid, - ptl_nid_t lo, ptl_nid_t hi); -extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive); -extern int kpr_sys_notify (int gw_nalid, ptl_nid_t gw_nid, - int alive, time_t when); - extern unsigned long long kpr_fwd_bytes; extern unsigned long kpr_fwd_packets; extern unsigned long kpr_fwd_errors; diff --git a/lnet/tests/ping_cli.c b/lnet/tests/ping_cli.c index 9977f206720cbb50e91f7c4fa06283a93d491424..b216df16c589451fee41e398bbb01f46dc0852c5 100644 --- a/lnet/tests/ping_cli.c +++ b/lnet/tests/ping_cli.c @@ -46,7 +46,7 @@ static struct pingcli_data *client = NULL; static int count = 0; static void -pingcli_shutdown(int err) +pingcli_shutdown(ptl_handle_ni_t nih, int err) { int rc; @@ -70,7 +70,7 @@ pingcli_shutdown(int err) if ((rc = PtlMEUnlink (client->me))) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (client->args->ioc_nal); + PtlNIFini(nih); case 4: /* Free our buffers */ @@ -84,7 +84,7 @@ pingcli_shutdown(int err) CDEBUG (D_OTHER, "ping client released resources\n"); } /* pingcli_shutdown() */ -static int pingcli_callback(ptl_event_t *ev) +static void pingcli_callback(ptl_event_t *ev) { int i, magic; i = *(int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned)); @@ -92,21 +92,19 @@ static int pingcli_callback(ptl_event_t *ev) if(magic != 0xcafebabe) { printk ("LustreError: Unexpected response \n"); - return 1; } if((i == count) || !count) wake_up_process (client->tsk); else printk ("LustreError: Received response after timeout for %d\n",i); - return 1; } static struct pingcli_data * pingcli_start(struct portal_ioctl_data *args) { - ptl_handle_ni_t *nip; + ptl_handle_ni_t nih = PTL_INVALID_HANDLE; unsigned ping_head_magic = PING_HEADER_MAGIC; unsigned ping_bulk_magic = PING_BULK_MAGIC; int rc; @@ -127,7 +125,7 @@ pingcli_start(struct portal_ioctl_data *args) if (client->outbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } @@ -136,23 +134,24 @@ pingcli_start(struct portal_ioctl_data *args) if (client->inbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) + rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); + if (rc != PTL_OK || rc != PTL_IFACE_DUP) { CERROR ("NAL %d not loaded\n", args->ioc_nal); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) + if ((rc = PtlGetId (nih, &client->myid))) { CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -164,20 +163,20 @@ pingcli_start(struct portal_ioctl_data *args) client->id_remote.nid = args->ioc_nid; client->id_remote.pid = 0; - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, + if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT, client->id_local, 0, ~0, PTL_RETAIN, PTL_INS_AFTER, &client->me))) { CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) + if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq))) { CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -196,7 +195,7 @@ pingcli_start(struct portal_ioctl_data *args) if ((rc = PtlMDAttach (client->me, client->md_in_head, PTL_UNLINK, &client->md_in_head_h))) { CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return (NULL); } /* Setup the outgoing ping header */ @@ -212,10 +211,10 @@ pingcli_start(struct portal_ioctl_data *args) count = 0; /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, + if ((rc=PtlMDBind (nih, client->md_out_head, PTL_UNLINK, &client->md_out_head_h))) { CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return NULL; } while ((args->ioc_count - count)) { @@ -230,7 +229,7 @@ pingcli_start(struct portal_ioctl_data *args) if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return NULL; } printk ("Lustre: sent msg no %d", count); @@ -255,7 +254,7 @@ pingcli_start(struct portal_ioctl_data *args) PORTAL_FREE (client->inbuf, (args->ioc_size + STDSIZE) * args->ioc_count); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); /* Success! */ return NULL; diff --git a/lnet/tests/ping_srv.c b/lnet/tests/ping_srv.c index 0aa1ea7e08dbd017c4ead862625856444a960810..84da81489a8a32d153b3dfb4d49f95310922cda4 100644 --- a/lnet/tests/ping_srv.c +++ b/lnet/tests/ping_srv.c @@ -81,7 +81,7 @@ static void *pingsrv_shutdown(int err) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (nal); + PtlNIFini (server->ni); case 4: @@ -167,19 +167,18 @@ int pingsrv_thread(void *arg) return 0; } -static int pingsrv_packet(ptl_event_t *ev) +static void pingsrv_packet(ptl_event_t *ev) { atomic_inc (&pkt); wake_up_process (server->tsk); - return 1; } /* pingsrv_head() */ -static int pingsrv_callback(ptl_event_t *ev) +static void pingsrv_callback(ptl_event_t *ev) { if (ev == NULL) { CERROR ("null in callback, ev=%p\n", ev); - return 0; + return; } server->evnt = *ev; @@ -193,23 +192,24 @@ static int pingsrv_callback(ptl_event_t *ev) packets_valid++; - return pingsrv_packet(ev); + pingsrv_packet(ev); } /* pingsrv_callback() */ static struct pingsrv_data *pingsrv_setup(void) { - ptl_handle_ni_t *nip; int rc; + server->ni = PTL_INVALID_HANDLE; + /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { + rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); + if (!(rc == PTL_OK || rc == PTL_IFACE_DUP)) { CDEBUG (D_OTHER, "NAL %d not loaded\n", nal); return pingsrv_shutdown (4); } - server->ni= *nip; /* Based on the initialization aquire our unique portal ID. */ if ((rc = PtlGetId (server->ni, &server->my_id))) { @@ -229,7 +229,7 @@ static struct pingsrv_data *pingsrv_setup(void) } - if ((rc = PtlEQAlloc (server->ni, 1024, pingsrv_callback, + if ((rc = PtlEQAlloc (server->ni, 1024, &pingsrv_callback, &server->eq))) { PDEBUG ("PtlEQAlloc (callback)", rc); return pingsrv_shutdown (2); diff --git a/lnet/tests/sping_cli.c b/lnet/tests/sping_cli.c index 663da4ef54e3a1cfd1c66be6912e0b4d900e7b62..8e8649195dd8da312f7cf5c3fa28d8cecce6e0da 100644 --- a/lnet/tests/sping_cli.c +++ b/lnet/tests/sping_cli.c @@ -51,7 +51,7 @@ static struct pingcli_data *client = NULL; static int count = 0; static void -pingcli_shutdown(int err) +pingcli_shutdown(ptl_handle_ni_t nih, int err) { int rc; @@ -72,7 +72,7 @@ pingcli_shutdown(int err) if ((rc = PtlMEUnlink (client->me))) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (client->args->ioc_nal); + PtlNIFini (nih); case 4: /* Free our buffers */ @@ -92,17 +92,16 @@ pingcli_shutdown(int err) CDEBUG (D_OTHER, "ping client released resources\n"); } /* pingcli_shutdown() */ -static int pingcli_callback(ptl_event_t *ev) +static void pingcli_callback(ptl_event_t *ev) { - wake_up_process (client->tsk); - return 1; + wake_up_process (client->tsk); } static struct pingcli_data * pingcli_start(struct portal_ioctl_data *args) { - const ptl_handle_ni_t *nip; + ptl_handle_ni_t nih = PTL_INVALID_HANDLE; unsigned ping_head_magic = PING_HEADER_MAGIC; char str[PTL_NALFMT_SIZE]; int rc; @@ -122,7 +121,7 @@ pingcli_start(struct portal_ioctl_data *args) if (client->outbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } @@ -131,23 +130,24 @@ pingcli_start(struct portal_ioctl_data *args) if (client->inbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) + rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CERROR ("NAL %d not loaded.\n", args->ioc_nal); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) + if ((rc = PtlGetId (nih, &client->myid))) { CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -159,20 +159,20 @@ pingcli_start(struct portal_ioctl_data *args) client->id_remote.nid = args->ioc_nid; client->id_remote.pid = 0; - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, + if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT, client->id_local, 0, ~0, PTL_RETAIN, PTL_INS_AFTER, &client->me))) { CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) + if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq))) { CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -189,7 +189,7 @@ pingcli_start(struct portal_ioctl_data *args) if ((rc = PtlMDAttach (client->me, client->md_in_head, PTL_UNLINK, &client->md_in_head_h))) { CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return (NULL); } @@ -204,17 +204,17 @@ pingcli_start(struct portal_ioctl_data *args) memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic)); /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, + if ((rc=PtlMDBind (nih, client->md_out_head, PTL_UNLINK, &client->md_out_head_h))) { CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return (NULL); } /* Put the ping packet */ if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return NULL; } @@ -223,13 +223,13 @@ pingcli_start(struct portal_ioctl_data *args) rc = schedule_timeout (20 * args->ioc_timeout); if (rc == 0) { printk ("LustreError: Time out on the server\n"); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return NULL; } else printk("Lustre: Received respose from the server \n"); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); /* Success! */ return NULL; diff --git a/lnet/tests/sping_srv.c b/lnet/tests/sping_srv.c index e8fb47051c19c8ee68c7bd9e7d143d85f2efd714..9e731261cc322c1c536b87bf9c83fa78f80bf57c 100644 --- a/lnet/tests/sping_srv.c +++ b/lnet/tests/sping_srv.c @@ -53,7 +53,7 @@ #define STDSIZE (sizeof(int) + sizeof(int) + 4) -static int nal = 0; // Your NAL, +static int nal = PTL_IFACE_DEFAULT; // Your NAL, static unsigned long packets_valid = 0; // Valid packets static int running = 1; atomic_t pkt; @@ -86,7 +86,7 @@ static void *pingsrv_shutdown(int err) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (nal); + PtlNIFini(server->ni); case 4: @@ -159,19 +159,18 @@ int pingsrv_thread(void *arg) return 0; } -static int pingsrv_packet(ptl_event_t *ev) +static void pingsrv_packet(ptl_event_t *ev) { atomic_inc (&pkt); wake_up_process (server->tsk); - return 1; } /* pingsrv_head() */ -static int pingsrv_callback(ptl_event_t *ev) +static void pingsrv_callback(ptl_event_t *ev) { if (ev == NULL) { CERROR ("null in callback, ev=%p\n", ev); - return 0; + return; } server->evnt = *ev; @@ -182,24 +181,24 @@ static int pingsrv_callback(ptl_event_t *ev) packets_valid++; - return pingsrv_packet(ev); + pingsrv_packet(ev); } /* pingsrv_callback() */ static struct pingsrv_data *pingsrv_setup(void) { - ptl_handle_ni_t *nip; int rc; /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { + server->ni = PTL_INVALID_HANDLE; + + rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal); return pingsrv_shutdown (4); } - server->ni= *nip; - /* Based on the initialization aquire our unique portal ID. */ if ((rc = PtlGetId (server->ni, &server->my_id))) { PDEBUG ("PtlGetId", rc); diff --git a/lnet/ulnds/bridge.h b/lnet/ulnds/bridge.h index 9a90ab8d8dd705c2d7523d007a824c4c12d857ec..90ce3244fed2d900e63ed740e9d82f434db9c368 100644 --- a/lnet/ulnds/bridge.h +++ b/lnet/ulnds/bridge.h @@ -10,6 +10,12 @@ #define TCPNAL_PROCBRIDGE_H #include <portals/lib-p30.h> +#include <portals/nal.h> + +#define PTL_IFACE_TCP 1 +#define PTL_IFACE_ER 2 +#define PTL_IFACE_SS 3 +#define PTL_IFACE_MAX 4 typedef struct bridge { int alive; @@ -22,12 +28,6 @@ typedef struct bridge { } *bridge; -nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); - typedef int (*nal_initialize)(bridge); extern nal_initialize nal_table[PTL_IFACE_MAX]; diff --git a/lnet/ulnds/procapi.c b/lnet/ulnds/procapi.c index 00a7ae4deb60cf4e1cf6d6f6b7a2cae5a33cc60f..e40c4b9c9b1413a7f0c7cbf994317376aa0e12a9 100644 --- a/lnet/ulnds/procapi.c +++ b/lnet/ulnds/procapi.c @@ -95,7 +95,7 @@ static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, * cleanup nal state, reclaim the lower side thread and * its state using PTL_FINI codepoint */ -static int procbridge_shutdown(nal_t *n, int ni) +static void procbridge_shutdown(nal_t *n) { bridge b=(bridge)n->nal_data; procbridge p=(procbridge)b->local; @@ -114,16 +114,6 @@ static int procbridge_shutdown(nal_t *n, int ni) } while (1); free(p); - return(0); -} - - -/* Function: validate - * useless stub - */ -static int procbridge_validate(nal_t *nal, void *base, size_t extent) -{ - return(0); } @@ -187,18 +177,20 @@ static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds) return (milliseconds); } +/* forward decl */ +extern int procbridge_startup (nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); /* api_nal * the interface vector to allow the generic code to access * this nal. this is seperate from the library side nal_cb. * TODO: should be dyanmically allocated */ -static nal_t api_nal = { - ni: {0}, +nal_t procapi_nal = { nal_data: NULL, - forward: procbridge_forward, + startup: procbridge_startup, shutdown: procbridge_shutdown, - validate: procbridge_validate, + forward: procbridge_forward, yield: procbridge_yield, lock: procbridge_lock, unlock: procbridge_unlock @@ -206,7 +198,7 @@ static nal_t api_nal = { ptl_nid_t tcpnal_mynid; -/* Function: procbridge_interface +/* Function: procbridge_startup * * Arguments: pid: requested process id (port offset) * PTL_ID_ANY not supported. @@ -214,40 +206,34 @@ ptl_nid_t tcpnal_mynid; * and effectively ignored * actual: limits actually allocated and returned * - * Returns: a pointer to my statically allocated top side NAL - * structure + * Returns: portals rc * * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) +int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { nal_init_args_t args; + procbridge p; bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; + /* XXX nal_type is purely private to tcpnal here */ int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - if(initialized) return (&api_nal); + LASSERT(nal == &procapi_nal); init_unix_timer(); b=(bridge)malloc(sizeof(struct bridge)); p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; + nal->nal_data=b; b->local=p; - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; - args.nia_requested_pid = requested_pid; - args.nia_limits = &limits; + args.nia_requested_limits = requested_limits; + args.nia_actual_limits = actual_limits; args.nia_nal_type = nal_type; args.nia_bridge = b; @@ -259,19 +245,19 @@ nal_t *procbridge_interface(int num_interface, /* initialize notifier */ if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) { perror("socketpair failed"); - return NULL; + return PTL_FAIL; } if (!register_io_handler(p->notifier[1], READ_HANDLER, procbridge_notifier_handler, p)) { perror("fail to register notifier handler"); - return NULL; + return PTL_FAIL; } /* create nal thread */ if (pthread_create(&p->t, NULL, nal_thread, &args)) { perror("nal_init: pthread_create"); - return(NULL); + return PTL_FAIL; } do { @@ -285,10 +271,9 @@ nal_t *procbridge_interface(int num_interface, } while (1); if (p->nal_flags & NAL_FLAG_STOPPED) - return (NULL); + return PTL_FAIL; b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - return (&api_nal); + return PTL_OK; } diff --git a/lnet/ulnds/procbridge.h b/lnet/ulnds/procbridge.h index 965f83d6a0d2b3017ecbd7534bb7f1a5d4ee35bd..1c8e7dd87e10e9dccec6dc171186ac3f6b9b2a23 100644 --- a/lnet/ulnds/procbridge.h +++ b/lnet/ulnds/procbridge.h @@ -35,7 +35,8 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; + ptl_ni_limits_t *nia_requested_limits; + ptl_ni_limits_t *nia_actual_limits; int nia_nal_type; bridge nia_bridge; } nal_init_args_t; @@ -50,10 +51,6 @@ extern void *nal_thread(void *); #define MAX_PTLS 128 extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); extern void procbridge_wakeup_nal(procbridge p); #endif diff --git a/lnet/ulnds/proclib.c b/lnet/ulnds/proclib.c index 1cfb233e97ff2af09b444925e0b45960c4fcd0b7..af0745b72a6b81c4cc968dc03614d2c4f09accde 100644 --- a/lnet/ulnds/proclib.c +++ b/lnet/ulnds/proclib.c @@ -157,9 +157,6 @@ static void check_stopping(void *z) * We define a limit macro to place a ceiling on limits * for syntactic convenience */ -#define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - extern int tcpnal_init(bridge); nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; @@ -170,10 +167,8 @@ void *nal_thread(void *z) bridge b = args->nia_bridge; procbridge p=b->local; int rc; - ptl_pid_t pid_request; + ptl_process_id_t process_id; int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); b->nal_cb->nal_data=b; @@ -189,28 +184,21 @@ void *nal_thread(void *z) b->nal_cb->cb_callback=nal_callback; b->nal_cb->cb_dist=nal_dist; - pid_request = args->nia_requested_pid; - desired = *args->nia_limits; nal_type = args->nia_nal_type; - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); - - set_address(b,pid_request); + /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is + * about to do from the process_id passed to it...*/ + set_address(b,args->nia_requested_pid); + process_id.pid = b->nal_cb->ni.pid; + process_id.nid = b->nal_cb->ni.nid; + if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); /* initialize the generic 'library' level code */ - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); + rc = lib_init(b->nal_cb, process_id, + args->nia_requested_limits, + args->nia_actual_limits); /* * Whatever the initialization returned is passed back to the @@ -219,11 +207,11 @@ void *nal_thread(void *z) */ /* this should perform error checking */ pthread_mutex_lock(&p->mutex); - p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; + p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); - if (!rc) { + if (rc == PTL_OK) { /* the thunk function is called each time the timer loop performs an operation and returns to blocking mode. we overload this function to inform the api side that @@ -233,4 +221,3 @@ void *nal_thread(void *z) } return(0); } -#undef LIMIT diff --git a/lnet/ulnds/socklnd/bridge.h b/lnet/ulnds/socklnd/bridge.h index 9a90ab8d8dd705c2d7523d007a824c4c12d857ec..90ce3244fed2d900e63ed740e9d82f434db9c368 100644 --- a/lnet/ulnds/socklnd/bridge.h +++ b/lnet/ulnds/socklnd/bridge.h @@ -10,6 +10,12 @@ #define TCPNAL_PROCBRIDGE_H #include <portals/lib-p30.h> +#include <portals/nal.h> + +#define PTL_IFACE_TCP 1 +#define PTL_IFACE_ER 2 +#define PTL_IFACE_SS 3 +#define PTL_IFACE_MAX 4 typedef struct bridge { int alive; @@ -22,12 +28,6 @@ typedef struct bridge { } *bridge; -nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); - typedef int (*nal_initialize)(bridge); extern nal_initialize nal_table[PTL_IFACE_MAX]; diff --git a/lnet/ulnds/socklnd/procapi.c b/lnet/ulnds/socklnd/procapi.c index 00a7ae4deb60cf4e1cf6d6f6b7a2cae5a33cc60f..e40c4b9c9b1413a7f0c7cbf994317376aa0e12a9 100644 --- a/lnet/ulnds/socklnd/procapi.c +++ b/lnet/ulnds/socklnd/procapi.c @@ -95,7 +95,7 @@ static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, * cleanup nal state, reclaim the lower side thread and * its state using PTL_FINI codepoint */ -static int procbridge_shutdown(nal_t *n, int ni) +static void procbridge_shutdown(nal_t *n) { bridge b=(bridge)n->nal_data; procbridge p=(procbridge)b->local; @@ -114,16 +114,6 @@ static int procbridge_shutdown(nal_t *n, int ni) } while (1); free(p); - return(0); -} - - -/* Function: validate - * useless stub - */ -static int procbridge_validate(nal_t *nal, void *base, size_t extent) -{ - return(0); } @@ -187,18 +177,20 @@ static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds) return (milliseconds); } +/* forward decl */ +extern int procbridge_startup (nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); /* api_nal * the interface vector to allow the generic code to access * this nal. this is seperate from the library side nal_cb. * TODO: should be dyanmically allocated */ -static nal_t api_nal = { - ni: {0}, +nal_t procapi_nal = { nal_data: NULL, - forward: procbridge_forward, + startup: procbridge_startup, shutdown: procbridge_shutdown, - validate: procbridge_validate, + forward: procbridge_forward, yield: procbridge_yield, lock: procbridge_lock, unlock: procbridge_unlock @@ -206,7 +198,7 @@ static nal_t api_nal = { ptl_nid_t tcpnal_mynid; -/* Function: procbridge_interface +/* Function: procbridge_startup * * Arguments: pid: requested process id (port offset) * PTL_ID_ANY not supported. @@ -214,40 +206,34 @@ ptl_nid_t tcpnal_mynid; * and effectively ignored * actual: limits actually allocated and returned * - * Returns: a pointer to my statically allocated top side NAL - * structure + * Returns: portals rc * * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) +int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { nal_init_args_t args; + procbridge p; bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; + /* XXX nal_type is purely private to tcpnal here */ int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - if(initialized) return (&api_nal); + LASSERT(nal == &procapi_nal); init_unix_timer(); b=(bridge)malloc(sizeof(struct bridge)); p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; + nal->nal_data=b; b->local=p; - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; - args.nia_requested_pid = requested_pid; - args.nia_limits = &limits; + args.nia_requested_limits = requested_limits; + args.nia_actual_limits = actual_limits; args.nia_nal_type = nal_type; args.nia_bridge = b; @@ -259,19 +245,19 @@ nal_t *procbridge_interface(int num_interface, /* initialize notifier */ if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) { perror("socketpair failed"); - return NULL; + return PTL_FAIL; } if (!register_io_handler(p->notifier[1], READ_HANDLER, procbridge_notifier_handler, p)) { perror("fail to register notifier handler"); - return NULL; + return PTL_FAIL; } /* create nal thread */ if (pthread_create(&p->t, NULL, nal_thread, &args)) { perror("nal_init: pthread_create"); - return(NULL); + return PTL_FAIL; } do { @@ -285,10 +271,9 @@ nal_t *procbridge_interface(int num_interface, } while (1); if (p->nal_flags & NAL_FLAG_STOPPED) - return (NULL); + return PTL_FAIL; b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - return (&api_nal); + return PTL_OK; } diff --git a/lnet/ulnds/socklnd/procbridge.h b/lnet/ulnds/socklnd/procbridge.h index 965f83d6a0d2b3017ecbd7534bb7f1a5d4ee35bd..1c8e7dd87e10e9dccec6dc171186ac3f6b9b2a23 100644 --- a/lnet/ulnds/socklnd/procbridge.h +++ b/lnet/ulnds/socklnd/procbridge.h @@ -35,7 +35,8 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; + ptl_ni_limits_t *nia_requested_limits; + ptl_ni_limits_t *nia_actual_limits; int nia_nal_type; bridge nia_bridge; } nal_init_args_t; @@ -50,10 +51,6 @@ extern void *nal_thread(void *); #define MAX_PTLS 128 extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); extern void procbridge_wakeup_nal(procbridge p); #endif diff --git a/lnet/ulnds/socklnd/proclib.c b/lnet/ulnds/socklnd/proclib.c index 1cfb233e97ff2af09b444925e0b45960c4fcd0b7..af0745b72a6b81c4cc968dc03614d2c4f09accde 100644 --- a/lnet/ulnds/socklnd/proclib.c +++ b/lnet/ulnds/socklnd/proclib.c @@ -157,9 +157,6 @@ static void check_stopping(void *z) * We define a limit macro to place a ceiling on limits * for syntactic convenience */ -#define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - extern int tcpnal_init(bridge); nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; @@ -170,10 +167,8 @@ void *nal_thread(void *z) bridge b = args->nia_bridge; procbridge p=b->local; int rc; - ptl_pid_t pid_request; + ptl_process_id_t process_id; int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); b->nal_cb->nal_data=b; @@ -189,28 +184,21 @@ void *nal_thread(void *z) b->nal_cb->cb_callback=nal_callback; b->nal_cb->cb_dist=nal_dist; - pid_request = args->nia_requested_pid; - desired = *args->nia_limits; nal_type = args->nia_nal_type; - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); - - set_address(b,pid_request); + /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is + * about to do from the process_id passed to it...*/ + set_address(b,args->nia_requested_pid); + process_id.pid = b->nal_cb->ni.pid; + process_id.nid = b->nal_cb->ni.nid; + if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); /* initialize the generic 'library' level code */ - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); + rc = lib_init(b->nal_cb, process_id, + args->nia_requested_limits, + args->nia_actual_limits); /* * Whatever the initialization returned is passed back to the @@ -219,11 +207,11 @@ void *nal_thread(void *z) */ /* this should perform error checking */ pthread_mutex_lock(&p->mutex); - p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; + p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); - if (!rc) { + if (rc == PTL_OK) { /* the thunk function is called each time the timer loop performs an operation and returns to blocking mode. we overload this function to inform the api side that @@ -233,4 +221,3 @@ void *nal_thread(void *z) } return(0); } -#undef LIMIT diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index fb031ae1b52070b5511587d573a8ed281d4a6920..e42fda69d2cd8fd369a1ab9c5f7a89af965f9ff7 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -61,7 +61,7 @@ unsigned int portal_debug; unsigned int portal_printk; unsigned int portal_stack; -unsigned int portal_cerror; +unsigned int portal_cerror = 1; static unsigned int g_nal = 0; diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 8c5e54d9da2465510bdf731dcd2d234183ce6fb1..198221cfd988875099d6db37562fe479cc7f681f 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -35,7 +35,6 @@ #include <linux/kp30.h> // #include <linux/obd.h> #include <portals/p30.h> -#include <portals/lib-types.h> /* FIXME (for PTL_MD_MAX_IOV) */ #include <linux/lustre_idl.h> #include <linux/lustre_ha.h> #include <linux/lustre_import.h> @@ -45,28 +44,42 @@ #define PTLRPC_MD_OPTIONS (PTL_MD_EVENT_START_DISABLE | \ PTL_MD_LUSTRE_COMPLETION_SEMANTICS) -/* Define some large-ish defaults for MTU and MAX_IOV if portals ones - * aren't defined (i.e. no limits) or too large */ -#if (defined(PTL_MTU) && (PTL_MTU <= (1 << 20))) -# define PTLRPC_MTU PTL_MTU +/* Define some large-ish maxima for bulk I/O + * CAVEAT EMPTOR, with multinet (i.e. gateways forwarding between networks) + * these limits are system wide and not interface-local. */ +#define PTLRPC_MAX_BRW_SIZE (1 << 20) +#define PTLRPC_MAX_BRW_PAGES 512 + +/* ...reduce to fit... */ + +#if CRAY_PORTALS +/* include a cray header here if relevant + * NB liblustre SIZE/PAGES is affected too, but it merges contiguous + * chunks, so FTTB, it always used contiguous MDs */ #else -# define PTLRPC_MTU (1 << 20) +# include <portals/lib-types.h> #endif -#if (defined(PTL_MAX_IOV) && (PTL_MAX_IOV <= 512)) -# define PTLRPC_MAX_IOV PTL_MAX_IOV -#else -# define PTLRPC_MAX_IOV 512 + +#if (defined(PTL_MTU) && (PTL_MTU < PTLRPC_MAX_BRW_SIZE)) +# undef PTLRPC_MAX_BRW_SIZE +# define PTLRPC_MAX_BRW_SIZE PTL_MTU #endif +#if (defined(PTL_MD_MAX_IOV) && (PTL_MD_MAX_IOV < PTLRPC_MAX_BRW_PAGES )) +# undef PTLRPC_MAX_BRW_PAGES +# define PTLRPC_MAX_BRW_PAGES PTL_MD_MAX_IOV +#endif + +/* ...and make consistent... */ -/* Define consistent max bulk size/pages */ -#if (PTLRPC_MTU > PTLRPC_MAX_IOV * PAGE_SIZE) -# define PTLRPC_MAX_BRW_PAGES PTLRPC_MAX_IOV -# define PTLRPC_MAX_BRW_SIZE (PTLRPC_MAX_IOV * PAGE_SIZE) +#if (PTLRPC_MAX_BRW_SIZE > PTLRPC_MAX_BRW_PAGES * PAGE_SIZE) +# undef PTLRPC_MAX_BRW_SIZE +# define PTLRPC_MAX_BRW_SIZE (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE) #else -# define PTLRPC_MAX_BRW_PAGES (PTLRPC_MTU / PAGE_SIZE) -# define PTLRPC_MAX_BRW_SIZE PTLRPC_MTU +# undef PTLRPC_MAX_BRW_PAGES +# define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE / PAGE_SIZE) #endif + /* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request * buffers */ #define SVC_BUF_VMALLOC_THRESHOLD (2*PAGE_SIZE) @@ -416,7 +429,7 @@ struct ptlrpc_bulk_desc { #if (!CRAY_PORTALS && defined(__KERNEL__)) ptl_kiov_t bd_iov[0]; #else - struct iovec bd_iov[0]; + ptl_md_iovec_t bd_iov[0]; #endif }; @@ -506,6 +519,7 @@ extern void client_bulk_callback (ptl_event_t *ev); extern void request_in_callback(ptl_event_t *ev); extern void reply_out_callback(ptl_event_t *ev); extern void server_bulk_callback (ptl_event_t *ev); +extern int ptlrpc_default_nal(void); /* ptlrpc/connection.c */ void ptlrpc_dump_connections(void); diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index ebc2a0f9875af536525fcc9b8b8660bc0b845037..386c39392e410b191316f2cf0c54d9acf773f831 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -1058,7 +1058,7 @@ typedef __u8 class_uuid_t[16]; void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out); /* lustre_peer.c */ -int lustre_uuid_to_peer(char *uuid, ptl_handle_ni_t *peer_ni, ptl_nid_t *peer_nid); +int lustre_uuid_to_peer(char *uuid, __u32 *peer_nal, ptl_nid_t *peer_nid); int class_add_uuid(char *uuid, __u64 nid, __u32 nal); int class_del_uuid (char *uuid); void class_init_uuidlist(void); diff --git a/lustre/kernel_patches/patches/bproc-patch-2.4.20 b/lustre/kernel_patches/patches/bproc-patch-2.4.20 index f081eb6edcce518cd07ee0f5d7c0955b7852a70d..e3cf679f58b4d9a3703564e41c8c11276d2c97da 100644 --- a/lustre/kernel_patches/patches/bproc-patch-2.4.20 +++ b/lustre/kernel_patches/patches/bproc-patch-2.4.20 @@ -1,4 +1,4 @@ -$Id: bproc-patch-2.4.20,v 1.8 2004/04/12 21:44:45 nic Exp $ +$Id: bproc-patch-2.4.20,v 1.9 2004/04/13 01:57:22 eeb Exp $ Index: linux/fs/exec.c =================================================================== @@ -764,7 +764,7 @@ Index: linux/kernel/bproc_hook.c + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * -+ * $Id: bproc-patch-2.4.20,v 1.8 2004/04/12 21:44:45 nic Exp $ ++ * $Id: bproc-patch-2.4.20,v 1.9 2004/04/13 01:57:22 eeb Exp $ + *-----------------------------------------------------------------------*/ +#include <linux/kernel.h> +#include <linux/sched.h> @@ -832,7 +832,7 @@ Index: linux/include/linux/bproc.h + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * -+ * $Id: bproc-patch-2.4.20,v 1.8 2004/04/12 21:44:45 nic Exp $ ++ * $Id: bproc-patch-2.4.20,v 1.9 2004/04/13 01:57:22 eeb Exp $ + *-----------------------------------------------------------------------*/ +#ifndef _LINUX_BPROC_H +#define _LINUX_BPROC_H diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index c02874414612974097c8ae5492f2602014232e08..f9fa7681de9d5c3525f810213a78f7fabeece6e9 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -55,23 +55,6 @@ ptl_handle_ni_t tcpnal_ni; struct task_struct *current; /* portals interfaces */ -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - switch (nal) - { - case SOCKNAL: - return &tcpnal_ni; - default: - return NULL; - } -} - -inline void -kportal_put_ni (int nal) -{ - return; -} struct ldlm_namespace; struct ldlm_res_id; @@ -145,19 +128,16 @@ int init_lib_portals() int rc; ENTRY; - PtlInit(&max_interfaces); - rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni); - if (rc != 0) { - CERROR("TCPNAL: PtlNIInit failed: error %d\n", rc); - PtlFini(); - RETURN (rc); + rc = PtlInit(&max_interfaces); + if (rc != PTL_OK) { + CERROR("PtlInit failed: %d\n", rc); + RETURN (-ENXIO); } - PtlNIDebug(tcpnal_ni, ~0); - RETURN(rc); + RETURN(0); } int -kportal_nal_cmd(struct portals_cfg *pcfg) +libcfs_nal_cmd(struct portals_cfg *pcfg) { /* handle portals command if we want */ return 0; @@ -225,7 +205,6 @@ int lllib_init(char *dumpfile) if (init_obdclass() || init_lib_portals() || ptlrpc_init() || - ldlm_init() || mdc_init() || lov_init() || osc_init()) @@ -266,6 +245,7 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov) CERROR("Can't parse NID %s\n", g_zconf_mdsnid); RETURN(-EINVAL); } + nal = ptl_name2nal("tcp"); if (nal <= 0) { CERROR("Can't parse NAL tcp\n"); diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c index 40d373147a5e8b9fb78d054e228f88014d2e9a79..59bbac7b684db30aae3581a3eeee1bc50f025124 100644 --- a/lustre/liblustre/tests/echo_test.c +++ b/lustre/liblustre/tests/echo_test.c @@ -70,46 +70,11 @@ struct pingcli_args { struct task_struct *current; -/* portals interfaces */ -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - switch (nal) - { - case SOCKNAL: - return &tcpnal_ni; - default: - return NULL; - } -} - -inline void -kportal_put_ni (int nal) -{ - return; -} - int -kportal_nal_cmd(struct portals_cfg *pcfg) +libcfs_nal_cmd(struct portals_cfg *pcfg) { -#if 0 - __u32 nal = pcfg->pcfg_nal; - int rc = -EINVAL; - - ENTRY; - - down(&nal_cmd_sem); - if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) { - CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, - pcfg->pcfg_command); - rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private); - } - up(&nal_cmd_sem); - RETURN(rc); -#else CERROR("empty function!!!\n"); return 0; -#endif } int init_current(int argc, char **argv) @@ -127,14 +92,11 @@ int init_lib_portals() int max_interfaces; int rc; - PtlInit(&max_interfaces); - rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni); + rc = PtlInit(&max_interfaces); if (rc != 0) { CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - PtlFini(); RETURN (rc); } - PtlNIDebug(tcpnal_ni, ~0); return rc; } @@ -349,7 +311,6 @@ int main(int argc, char **argv) if (init_current(argc, argv) || init_obdclass() || init_lib_portals() || ptlrpc_init() || - ldlm_init() || mdc_init() || lov_init() || osc_init() || diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 89dcec97f83658ddbda0307a35f3da609d27019e..09d7e71ac862dddf4f47d5f56df05d714da0f931 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -402,7 +402,7 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, PCFG_INIT(pcfg, NAL_CMD_REGISTER_MYNID); pcfg.pcfg_nal = lmd->lmd_nal; pcfg.pcfg_nid = lmd->lmd_local_nid; - err = kportal_nal_cmd(&pcfg); + err = libcfs_nal_cmd(&pcfg); if (err <0) GOTO(out, err); } @@ -415,7 +415,7 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, pcfg.pcfg_misc = lmd->lmd_port; pcfg.pcfg_size = 8388608; pcfg.pcfg_flags = 0x4; /*share*/ - err = kportal_nal_cmd(&pcfg); + err = libcfs_nal_cmd(&pcfg); if (err <0) GOTO(out, err); } @@ -506,7 +506,7 @@ out_del_conn: pcfg.pcfg_nid = lmd->lmd_server_nid; pcfg.pcfg_id = lmd->lmd_server_ipaddr; pcfg.pcfg_flags = 1; /*share*/ - err = kportal_nal_cmd(&pcfg); + err = libcfs_nal_cmd(&pcfg); if (err <0) GOTO(out, err); } diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index d2cca20a7b6dd4ca6a7a13bf0f51a24aafe6f705..d941ddac8acb2c550f0418991812b10fe904a9df 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -295,7 +295,7 @@ int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, int pattern) (*lsmp)->lsm_magic = LOV_MAGIC; (*lsmp)->lsm_stripe_count = stripe_count; (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count; - (*lsmp)->lsm_xfersize = PTLRPC_MTU * stripe_count; + (*lsmp)->lsm_xfersize = PTLRPC_MAX_BRW_SIZE * stripe_count; (*lsmp)->lsm_pattern = pattern; (*lsmp)->lsm_oinfo[0].loi_ost_idx = ~0; diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index b668bb7fa06a0ee2f6d1cbc76f017cbee14f20bf..7fa8003b9af0534193bfd945d5f77f6974d91928 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -297,10 +297,10 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) case OBD_IOC_CLOSE_UUID: { ptl_nid_t peer_nid; - ptl_handle_ni_t peer_ni; + __u32 peer_nal; CDEBUG(D_IOCTL, "closing all connections to uuid %s\n", data->ioc_inlbuf1); - lustre_uuid_to_peer(data->ioc_inlbuf1, &peer_ni, &peer_nid); + lustre_uuid_to_peer(data->ioc_inlbuf1, &peer_nal, &peer_nid); GOTO(out, err = 0); } diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c index 1e227e12b5bcb801586fe6e949cba60d7315a81e..8298fc32d5f83cd7e23dbff1ae660b4774fe4072 100644 --- a/lustre/obdclass/lustre_peer.c +++ b/lustre/obdclass/lustre_peer.c @@ -42,7 +42,6 @@ struct uuid_nid_data { ptl_nid_t nid; char *uuid; __u32 nal; - ptl_handle_ni_t ni; }; /* FIXME: This should probably become more elegant than a global linked list */ @@ -61,8 +60,7 @@ void class_exit_uuidlist(void) class_del_uuid(NULL); } -int lustre_uuid_to_peer(char *uuid, - ptl_handle_ni_t *peer_ni, ptl_nid_t *peer_nid) +int lustre_uuid_to_peer(char *uuid, __u32 *peer_nal, ptl_nid_t *peer_nid) { struct list_head *tmp; @@ -74,7 +72,7 @@ int lustre_uuid_to_peer(char *uuid, if (strcmp(data->uuid, uuid) == 0) { *peer_nid = data->nid; - *peer_ni = data->ni; + *peer_nal = data->nal; spin_unlock (&g_uuid_lock); return 0; @@ -87,7 +85,6 @@ int lustre_uuid_to_peer(char *uuid, int class_add_uuid(char *uuid, __u64 nid, __u32 nal) { - const ptl_handle_ni_t *nip; struct uuid_nid_data *data; int rc; int nob = strnlen (uuid, PAGE_SIZE) + 1; @@ -95,26 +92,21 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal) if (nob > PAGE_SIZE) return -EINVAL; - nip = kportal_get_ni (nal); - if (nip == NULL) { - CERROR("get_ni failed: is the NAL module loaded?\n"); - return -EIO; - } - rc = -ENOMEM; OBD_ALLOC(data, sizeof(*data)); if (data == NULL) - goto fail_0; + return -ENOMEM; OBD_ALLOC(data->uuid, nob); - if (data == NULL) - goto fail_1; + if (data == NULL) { + OBD_FREE(data, sizeof(*data)); + return -ENOMEM; + } CDEBUG(D_INFO, "add uuid %s "LPX64" %u\n", uuid, nid, nal); memcpy(data->uuid, uuid, nob); data->nid = nid; data->nal = nal; - data->ni = *nip; spin_lock (&g_uuid_lock); @@ -123,12 +115,6 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal) spin_unlock (&g_uuid_lock); return 0; - - fail_1: - OBD_FREE (data, sizeof (*data)); - fail_0: - kportal_put_ni (nal); - return (rc); } /* delete only one entry if uuid is specified, otherwise delete all */ @@ -164,7 +150,6 @@ int class_del_uuid (char *uuid) list_del (&data->head); - kportal_put_ni (data->nal); OBD_FREE(data->uuid, strlen(data->uuid) + 1); OBD_FREE(data, sizeof(*data)); } while (!list_empty (&deathrow)); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 6359a3c98ed15daa46c5824f8c5cde28c21ef375..7d22ba276f105770622bcf918c5fc49cda28e668 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -601,7 +601,7 @@ static int class_config_llog_handler(struct llog_handle * handle, pcfg->pcfg_nid = cfg->cfg_local_nid; } - rc = kportal_nal_cmd(pcfg); + rc = libcfs_nal_cmd(pcfg); } out: RETURN(rc); diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index c4c650e9b5c2d7c63e194d8899f35ebb3637ffc7..3bbda8df3e769aa927a1f5784f0f799c6f11e3f7 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -301,6 +301,7 @@ extern void kportal_blockallsigs (void); #endif # include <unistd.h> # include <time.h> +# include <limits.h> # include <asm/types.h> # ifndef DEBUG_SUBSYSTEM # define DEBUG_SUBSYSTEM S_UNDEFINED @@ -433,40 +434,6 @@ struct portals_device_userstate * USER LEVEL STUFF BELOW */ -#define PORTALS_CFG_VERSION 0x00010001; - -struct portals_cfg { - __u32 pcfg_version; - __u32 pcfg_command; - - __u32 pcfg_nal; - __u32 pcfg_flags; - - __u32 pcfg_gw_nal; - __u64 pcfg_nid; - __u64 pcfg_nid2; - __u64 pcfg_nid3; - __u32 pcfg_id; - __u32 pcfg_misc; - __u32 pcfg_fd; - __u32 pcfg_count; - __u32 pcfg_size; - __u32 pcfg_wait; - - __u32 pcfg_plen1; /* buffers in userspace */ - char *pcfg_pbuf1; - __u32 pcfg_plen2; /* buffers in userspace */ - char *pcfg_pbuf2; -}; - -#define PCFG_INIT(pcfg, cmd) \ -do { \ - memset(&pcfg, 0, sizeof(pcfg)); \ - pcfg.pcfg_version = PORTALS_CFG_VERSION; \ - pcfg.pcfg_command = (cmd); \ - \ -} while (0) - #define PORTAL_IOCTL_VERSION 0x00010007 #define PING_SYNC 0 #define PING_ASYNC 1 @@ -675,17 +642,10 @@ enum { SCIMACNAL = 6, ROUTER = 7, IBNAL = 8, + CRAY_KB_ERNAL = 9, NAL_ENUM_END_MARKER }; -#ifdef __KERNEL__ -extern ptl_handle_ni_t kqswnal_ni; -extern ptl_handle_ni_t ksocknal_ni; -extern ptl_handle_ni_t kgmnal_ni; -extern ptl_handle_ni_t kibnal_ni; -extern ptl_handle_ni_t kscimacnal_ni; -#endif - #define PTL_NALFMT_SIZE 16 #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) @@ -711,10 +671,6 @@ enum { DEBUG_DAEMON_CONTINUE = 4, }; -/* module.c */ -typedef int (*nal_cmd_handler_t)(struct portals_cfg *, void * private); -int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private); -int kportal_nal_unregister(int nal); enum cfg_record_type { PORTALS_CFG_TYPE = 1, @@ -722,10 +678,6 @@ enum cfg_record_type { }; typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); -int kportal_nal_cmd(struct portals_cfg *); - -ptl_handle_ni_t *kportal_get_ni (int nal); -void kportal_put_ni (int nal); #ifdef __CYGWIN__ # ifndef BITS_PER_LONG diff --git a/lustre/portals/include/linux/kpr.h b/lustre/portals/include/linux/kpr.h index 45b58fe6453e5e7f80249aa48ba0630a08a294af..51d2d2f7abc813052fecb32bcdc1ce3041221390 100644 --- a/lustre/portals/include/linux/kpr.h +++ b/lustre/portals/include/linux/kpr.h @@ -81,21 +81,6 @@ typedef struct { void *kpr_arg; } kpr_router_t; -/* Router's control interface (Kernel Portals Routing Control Interface) */ -typedef const struct { - int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_get_route)(int index, int *gateway_nal, - ptl_nid_t *gateway, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, - int *alive); - int (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid, - int alive, time_t when); -} kpr_control_interface_t; - -extern kpr_control_interface_t kpr_control_interface; extern kpr_router_interface_t kpr_router_interface; static inline int diff --git a/lustre/portals/include/linux/libcfs.h b/lustre/portals/include/linux/libcfs.h index ff517870210bbb60b90d1fec5a736d049c268d93..f33e56ffbb580905f51b61cf28772146408b12fb 100644 --- a/lustre/portals/include/linux/libcfs.h +++ b/lustre/portals/include/linux/libcfs.h @@ -164,6 +164,45 @@ do { \ #define EXIT do { } while (0) #endif +#define PORTALS_CFG_VERSION 0x00010001; + +struct portals_cfg { + __u32 pcfg_version; + __u32 pcfg_command; + + __u32 pcfg_nal; + __u32 pcfg_flags; + + __u32 pcfg_gw_nal; + __u64 pcfg_nid; + __u64 pcfg_nid2; + __u64 pcfg_nid3; + __u32 pcfg_id; + __u32 pcfg_misc; + __u32 pcfg_fd; + __u32 pcfg_count; + __u32 pcfg_size; + __u32 pcfg_wait; + + __u32 pcfg_plen1; /* buffers in userspace */ + char *pcfg_pbuf1; + __u32 pcfg_plen2; /* buffers in userspace */ + char *pcfg_pbuf2; +}; + +#define PCFG_INIT(pcfg, cmd) \ +do { \ + memset(&pcfg, 0, sizeof(pcfg)); \ + pcfg.pcfg_version = PORTALS_CFG_VERSION; \ + pcfg.pcfg_command = (cmd); \ + \ +} while (0) + +typedef int (nal_cmd_handler_fn)(struct portals_cfg *, void *); +int libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *arg); +int libcfs_nal_cmd(struct portals_cfg *pcfg); +void libcfs_nal_cmd_unregister(int nal); + struct portal_ioctl_data { __u32 ioc_len; __u32 ioc_version; @@ -196,6 +235,7 @@ struct portal_ioctl_data { char ioc_bulk[0]; }; + #ifdef __KERNEL__ #include <linux/list.h> diff --git a/lustre/portals/include/portals/api.h b/lustre/portals/include/portals/api.h index 69fa339b9759407bedf6097fbe4c3e93222b3715..6d382bb8abb0d21943c92de8dbbbb18c578705a8 100644 --- a/lustre/portals/include/portals/api.h +++ b/lustre/portals/include/portals/api.h @@ -9,9 +9,9 @@ int PtlInit(int *); void PtlFini(void); -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in, - ptl_ac_index_t acl_size_in, ptl_pid_t requested_pid, - ptl_handle_ni_t * interface_out); +int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, + ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, + ptl_handle_ni_t *interface_out); int PtlNIInitialized(ptl_interface_t); @@ -37,17 +37,6 @@ int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out); #endif -/* - * PtlNIDebug: - * - * This is not an official Portals 3 API call. It is provided - * by the reference implementation to allow the maintainers an - * easy way to turn on and off debugging information in the - * library. Do not use it in code that is not intended for use - * with any version other than the portable reference library. - */ -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in); - /* * PtlNIFailNid * @@ -123,8 +112,8 @@ int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout, /* These should be called by users */ int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in, - int (*callback) (ptl_event_t * event), - ptl_handle_eq_t * handle_out); + ptl_eq_handler_t handler, + ptl_handle_eq_t *handle_out); int PtlEQFree(ptl_handle_eq_t eventq_in); int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out); diff --git a/lustre/portals/include/portals/arg-blocks.h b/lustre/portals/include/portals/arg-blocks.h index 0be8a3d53ebf41ded3980d0e250ded128c2ba64f..21e30d55ad6fd3ff58f9ea6866047c3a29f189d0 100644 --- a/lustre/portals/include/portals/arg-blocks.h +++ b/lustre/portals/include/portals/arg-blocks.h @@ -18,7 +18,7 @@ #define PTL_GETID 1 #define PTL_NISTATUS 2 #define PTL_NIDIST 3 -#define PTL_NIDEBUG 4 +// #define PTL_NIDEBUG 4 #define PTL_MEATTACH 5 #define PTL_MEINSERT 6 // #define PTL_MEPREPEND 7 @@ -205,7 +205,7 @@ typedef struct PtlEQAlloc_in { ptl_size_t count_in; void *base_in; int len_in; - int (*callback_in) (ptl_event_t * event); + ptl_eq_handler_t callback_in; } PtlEQAlloc_in; typedef struct PtlEQAlloc_out { diff --git a/lustre/portals/include/portals/errno.h b/lustre/portals/include/portals/errno.h index 499f32bd46c257c9531fba05aeca5e6f722db7a4..a98bfd94e550a9891c536769806c13956036fb6e 100644 --- a/lustre/portals/include/portals/errno.h +++ b/lustre/portals/include/portals/errno.h @@ -37,12 +37,11 @@ typedef enum { PTL_MD_NO_UPDATE = 18, PTL_FAIL = 19, - PTL_IOV_TOO_MANY = 20, - PTL_IOV_TOO_SMALL = 21, + PTL_IOV_INVALID = 20, - PTL_EQ_IN_USE = 22, + PTL_EQ_IN_USE = 21, - PTL_MAX_ERRNO = 23 + PTL_MAX_ERRNO = 22 } ptl_err_t; /* If you change these, you must update the string table in api-errno.c */ diff --git a/lustre/portals/include/portals/lib-dispatch.h b/lustre/portals/include/portals/lib-dispatch.h index 90ed4f5afb7fc85c82cb65d3407e2bed7cf327d1..610c776de3c0537d3d8c36b5128e44bb081cfcac 100644 --- a/lustre/portals/include/portals/lib-dispatch.h +++ b/lustre/portals/include/portals/lib-dispatch.h @@ -18,7 +18,6 @@ extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlNIDebug(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args, diff --git a/lustre/portals/include/portals/lib-p30.h b/lustre/portals/include/portals/lib-p30.h index 350447e85f39c1782150545621f9032aa0cd9fab..efa929cb07b053589e5298d60f2f46b6ec34e233 100644 --- a/lustre/portals/include/portals/lib-p30.h +++ b/lustre/portals/include/portals/lib-p30.h @@ -194,11 +194,11 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd) int niov; if ((umd->options & PTL_MD_KIOV) != 0) { - niov = umd->niov; + niov = umd->length; size = offsetof(lib_md_t, md_iov.kiov[niov]); } else { niov = ((umd->options & PTL_MD_IOVEC) != 0) ? - umd->niov : 1; + umd->length : 1; size = offsetof(lib_md_t, md_iov.iov[niov]); } @@ -346,8 +346,9 @@ ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) return (lh_entry (lh, lib_me_t, me_lh)); } -extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); +extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid, + ptl_ni_limits_t *desired_limits, + ptl_ni_limits_t *actual_limits); extern int lib_fini(nal_cb_t * cb); extern void lib_dispatch(nal_cb_t * cb, void *private, int index, void *arg_block, void *ret_block); diff --git a/lustre/portals/include/portals/lib-types.h b/lustre/portals/include/portals/lib-types.h index 40776a61d15d508567fdd581b7e3ea6d159a24bb..ef618c7bd7e2a2d30842a0d46a5020fb05207dc1 100644 --- a/lustre/portals/include/portals/lib-types.h +++ b/lustre/portals/include/portals/lib-types.h @@ -169,7 +169,7 @@ struct lib_eq_t { ptl_size_t size; ptl_event_t *base; int eq_refcount; - int (*event_callback) (ptl_event_t * event); + ptl_eq_handler_t event_callback; void *eq_addrkey; }; @@ -245,15 +245,11 @@ typedef struct { * extracted by masking with (PTL_COOKIE_TYPES - 1) */ typedef struct { - int up; - int refcnt; ptl_nid_t nid; ptl_pid_t pid; - int num_nodes; - unsigned int debug; lib_ptl_t tbl; - lib_ac_t ac; lib_counters_t counters; + ptl_ni_limits_t actual_limits; int ni_lh_hash_size; /* size of lib handle hash table */ struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */ diff --git a/lustre/portals/include/portals/nal.h b/lustre/portals/include/portals/nal.h index 5b72046cd8b7be04fde8825759e99acde389bfe0..1f925c1685e2e0e1074db42edbc43fa935077bba 100644 --- a/lustre/portals/include/portals/nal.h +++ b/lustre/portals/include/portals/nal.h @@ -18,32 +18,29 @@ typedef struct nal_t nal_t; struct nal_t { - ptl_ni_t ni; - int refct; - void *nal_data; - int *timeout; /* for libp30api users */ - int (*forward) (nal_t * nal, int index, /* Function ID */ - void *args, size_t arg_len, void *ret, size_t ret_len); + int nal_refct; + void *nal_data; - int (*shutdown) (nal_t * nal, int interface); + int (*startup) (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *req, ptl_ni_limits_t *actual); + + void (*shutdown) (nal_t *nal); - int (*validate) (nal_t * nal, void *base, size_t extent); + int (*forward) (nal_t *nal, int index, /* Function ID */ + void *args, size_t arg_len, void *ret, size_t ret_len); - int (*yield) (nal_t * nal, unsigned long *flags, int milliseconds); + int (*yield) (nal_t *nal, unsigned long *flags, int milliseconds); - void (*lock) (nal_t * nal, unsigned long *flags); + void (*lock) (nal_t *nal, unsigned long *flags); - void (*unlock) (nal_t * nal, unsigned long *flags); + void (*unlock) (nal_t *nal, unsigned long *flags); }; -typedef nal_t *(ptl_interface_t) (int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_IP(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_MYR(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); - extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any); -#ifndef PTL_IFACE_DEFAULT -#define PTL_IFACE_DEFAULT (PTL_IFACE_IP) +#ifdef __KERNEL__ +extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal); +extern void ptl_unregister_nal(ptl_interface_t interface); #endif #endif diff --git a/lustre/portals/include/portals/nalids.h b/lustre/portals/include/portals/nalids.h index 1568593646f8430231ffd8246daae45bcb0384d6..55a991b70a4cef00fa18e144a218cc34c2f02c0e 100644 --- a/lustre/portals/include/portals/nalids.h +++ b/lustre/portals/include/portals/nalids.h @@ -1,6 +1,2 @@ #include "build_check.h" -#define PTL_IFACE_TCP 1 -#define PTL_IFACE_ER 2 -#define PTL_IFACE_SS 3 -#define PTL_IFACE_MAX 4 diff --git a/lustre/portals/include/portals/p30.h b/lustre/portals/include/portals/p30.h index 577ffabbcac5554de668aba29524f5b8eba469d6..4b8631ddee86564f90a9779dea2d43f6ac052457 100644 --- a/lustre/portals/include/portals/p30.h +++ b/lustre/portals/include/portals/p30.h @@ -21,45 +21,6 @@ #endif #include <portals/types.h> -#include <portals/nal.h> #include <portals/api.h> -#include <portals/nalids.h> - -/* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ -#define PTL_DEBUG_NONE 0ul -#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - -#define __bit(x) ((unsigned long) 1<<(x)) -#define PTL_DEBUG_PUT __bit(0) -#define PTL_DEBUG_GET __bit(1) -#define PTL_DEBUG_REPLY __bit(2) -#define PTL_DEBUG_ACK __bit(3) -#define PTL_DEBUG_DROP __bit(4) -#define PTL_DEBUG_REQUEST __bit(5) -#define PTL_DEBUG_DELIVERY __bit(6) -#define PTL_DEBUG_UNLINK __bit(7) -#define PTL_DEBUG_THRESHOLD __bit(8) -#define PTL_DEBUG_API __bit(9) - -/* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ -#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ -#define PTL_DEBUG_NI0 __bit(24) -#define PTL_DEBUG_NI1 __bit(25) -#define PTL_DEBUG_NI2 __bit(26) -#define PTL_DEBUG_NI3 __bit(27) -#define PTL_DEBUG_NI4 __bit(28) -#define PTL_DEBUG_NI5 __bit(29) -#define PTL_DEBUG_NI6 __bit(30) -#define PTL_DEBUG_NI7 __bit(31) #endif diff --git a/lustre/portals/include/portals/types.h b/lustre/portals/include/portals/types.h index 902db764f072798b65ad680ddab0f738537770f5..51b557cc44b177237b786d1f7fdf328dd9cd0c55 100644 --- a/lustre/portals/include/portals/types.h +++ b/lustre/portals/include/portals/types.h @@ -41,7 +41,6 @@ typedef __u64 ptl_hdr_data_t; typedef __u32 ptl_size_t; #define PTL_TIME_FOREVER (-1) -#define PTL_EQ_HANDLER_NONE NULL typedef struct { unsigned long nal_idx; /* which network interface */ @@ -80,12 +79,6 @@ typedef enum { PTL_INS_AFTER } ptl_ins_pos_t; -typedef struct { - struct page *kiov_page; - unsigned int kiov_len; - unsigned int kiov_offset; -} ptl_kiov_t; - typedef struct { void *start; ptl_size_t length; @@ -94,7 +87,6 @@ typedef struct { unsigned int options; void *user_ptr; ptl_handle_eq_t eventq; - unsigned int niov; } ptl_md_t; /* Options for the MD structure */ @@ -112,9 +104,19 @@ typedef struct { /* For compatibility with Cray Portals */ #define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0 +#define PTL_MD_PHYS 0 #define PTL_MD_THRESH_INF (-1) +/* NB lustre portals uses struct iovec internally! */ +typedef struct iovec ptl_md_iovec_t; + +typedef struct { + struct page *kiov_page; + unsigned int kiov_len; + unsigned int kiov_offset; +} ptl_kiov_t; + typedef enum { PTL_EVENT_GET_START, PTL_EVENT_GET_END, @@ -168,6 +170,9 @@ typedef enum { PTL_NOACK_REQ } ptl_ack_req_t; +typedef void (*ptl_eq_handler_t)(ptl_event_t *event); +#define PTL_EQ_HANDLER_NONE NULL + typedef struct { volatile ptl_seq_t sequence; ptl_size_t size; @@ -180,11 +185,14 @@ typedef struct { } ptl_ni_t; typedef struct { - int max_match_entries; /* max number of match entries */ - int max_mem_descriptors; /* max number of memory descriptors */ - int max_event_queues; /* max number of event queues */ - int max_atable_index; /* maximum access control list table index */ - int max_ptable_index; /* maximum portals table index */ + int max_mes; + int max_mds; + int max_eqs; + int max_ac_index; + int max_pt_index; + int max_md_iovecs; + int max_me_list; + int max_getput_md; } ptl_ni_limits_t; /* @@ -202,4 +210,7 @@ typedef enum { typedef int ptl_sr_value_t; +typedef int ptl_interface_t; +#define PTL_IFACE_DEFAULT (-1) + #endif diff --git a/lustre/portals/knals/gmnal/gmnal.h b/lustre/portals/knals/gmnal/gmnal.h index 995559944a5b9e2a842da3174840e76547da5543..e48552e7f214a10f78cb382215496fe6563baea0 100644 --- a/lustre/portals/knals/gmnal/gmnal.h +++ b/lustre/portals/knals/gmnal/gmnal.h @@ -190,7 +190,6 @@ typedef struct _gmnal_rxtwe { #define NRXTHREADS 10 /* max number of receiver threads */ typedef struct _gmnal_data_t { - int refcnt; spinlock_t cb_lock; spinlock_t stxd_lock; struct semaphore stxd_token; @@ -309,9 +308,12 @@ extern gmnal_data_t *global_nal_data; /* * API NAL */ +int gmnal_api_startup(nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); + int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); -int gmnal_api_shutdown(nal_t *, int); +void gmnal_api_shutdown(nal_t *); int gmnal_api_validate(nal_t *, void *, size_t); @@ -323,14 +325,13 @@ void gmnal_api_unlock(nal_t *, unsigned long *); #define GMNAL_INIT_NAL(a) do { \ + a->startup = gmnal_api_startup; \ a->forward = gmnal_api_forward; \ a->shutdown = gmnal_api_shutdown; \ - a->validate = NULL; \ a->yield = gmnal_api_yield; \ a->lock = gmnal_api_lock; \ a->unlock = gmnal_api_unlock; \ a->timeout = NULL; \ - a->refct = 1; \ a->nal_data = NULL; \ } while (0) @@ -373,7 +374,7 @@ void gmnal_cb_sti(nal_cb_t *, unsigned long *); int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); -nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); +int gmnal_init(void); void gmnal_fini(void); diff --git a/lustre/portals/knals/gmnal/gmnal_api.c b/lustre/portals/knals/gmnal/gmnal_api.c index 338d75cd094d1deddad1789f372465a618b5f104..7c94f937b56b8d79aca59a0420c746aa8777bdeb 100644 --- a/lustre/portals/knals/gmnal/gmnal_api.c +++ b/lustre/portals/knals/gmnal/gmnal_api.c @@ -123,18 +123,51 @@ gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, /* * gmnal_api_shutdown + * nal_refct == 0 => called on last matching PtlNIFini() * Close down this interface and free any resources associated with it * nal_t nal our nal to shutdown */ -int +void gmnal_api_shutdown(nal_t *nal, int interface) { + gmnal_data_t *nal_data; + nal_cb_t *nal_cb; - gmnal_data_t *nal_data = nal->nal_data; - + if (nal->nal_refct != 0) + return; + CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data); - return(PTL_OK); + LASSERT(nal == global_nal_data->nal); + nal_data = nal->nal_data; + LASSERT(nal_data == global_nal_data); + nal_cb = nal_data->nal_cb; + + /* Stop portals calling our ioctl handler */ + libcfs_nal_cmd_unregister(GMNAL); + + /* XXX for shutdown "under fire" we probably need to set a shutdown + * flag so when lib calls us we fail immediately and dont queue any + * more work but our threads can still call into lib OK. THEN + * shutdown our threads, THEN lib_fini() */ + lib_fini(nal_cb); + + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_txd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + if (nal_data->sysctl) + unregister_sysctl_table (nal_data->sysctl); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + + global_nal_data = NULL; + PORTAL_MODULE_UNUSE; } @@ -210,57 +243,54 @@ gmnal_api_unlock(nal_t *nal, unsigned long *flags) } -nal_t * -gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t rpid) +int +gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { - nal_t *nal = NULL; nal_cb_t *nal_cb = NULL; gmnal_data_t *nal_data = NULL; gmnal_srxd_t *srxd = NULL; gm_status_t gm_status; unsigned int local_nid = 0, global_nid = 0; - ptl_nid_t portals_nid; - ptl_pid_t portals_pid = 0; + ptl_process_id_t process_id; + + if (nal->nal_refct != 0) { + if (actual_limits != NULL) { + nal_data = (gmnal_data_t *)nal->nal_data; + nal_cb = nal_data->nal_cb; + *actual_limits = nal->_cb->ni.actual_limits; + return (PTL_OK); + } + /* Called on first PtlNIInit() */ - CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], " - "ac_size[%d]\n", interface, ptl_size, ac_size); + CDEBUG(D_TRACE, "startup\n"); + LASSERT(global_nal_data == NULL); PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t)); if (!nal_data) { CDEBUG(D_ERROR, "can't get memory\n"); - return(NULL); + return(PTL_NO_SPACE); } memset(nal_data, 0, sizeof(gmnal_data_t)); /* * set the small message buffer size */ - nal_data->refcnt = 1; CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data); CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size); - PORTAL_ALLOC(nal, sizeof(nal_t)); - if (!nal) { - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - return(NULL); - } - memset(nal, 0, sizeof(nal_t)); - CDEBUG(D_INFO, "Allocd and reset nal[%p]\n", nal); - PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); if (!nal_cb) { - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - return(NULL); + return(PTL_NO_SPACE); } memset(nal_cb, 0, sizeof(nal_cb_t)); CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb); - GMNAL_INIT_NAL(nal); GMNAL_INIT_NAL_CB(nal_cb); /* * String them all together @@ -280,10 +310,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, CDEBUG(D_INFO, "Calling gm_init\n"); if (gm_init() != GM_SUCCESS) { CDEBUG(D_ERROR, "call to gm_init failed\n"); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } @@ -326,10 +355,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, GMNAL_GM_LOCK(nal_data); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } @@ -344,10 +372,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } @@ -374,10 +401,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } gmnal_start_kernel_threads(nal_data); @@ -407,10 +433,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } nal_data->gm_local_nid = local_nid; CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid); @@ -428,10 +453,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid); nal_data->gm_global_nid = global_nid; @@ -440,13 +464,15 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, /* pid = gm_getpid(); */ - CDEBUG(D_INFO, "portals_pid is [%u]\n", portals_pid); - portals_nid = (unsigned long)global_nid; - CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", portals_nid); + process_id.pid = 0; + process_id.nid = global_nid; + + CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid); + CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid); CDEBUG(D_PORTALS, "calling lib_init\n"); - if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, - ac_size) != PTL_OK) { + if (lib_init(nal_cb, process_id, + requested_limits, actual_limits) != PTL_OK) { CDEBUG(D_ERROR, "lib_init failed\n"); gmnal_stop_rxthread(nal_data); gmnal_stop_ctthread(nal_data); @@ -456,22 +482,68 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } + + if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, nal->nal_data) != 0) { + CDEBUG(D_INFO, "libcfs_nal_cmd_register failed\n"); + + /* XXX these cleanup cases should be restructured to + * minimise duplication... */ + lib_fini(nal_cb); + + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_txd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(PTL_FAIL); + } + + /* might be better to initialise this at module load rather than in + * NAL startup */ nal_data->sysctl = NULL; nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0); CDEBUG(D_INFO, "gmnal_init finished\n"); global_nal_data = nal->nal_data; - return(nal); + + /* no unload now until shutdown */ + PORTAL_MODULE_USE; + + return(PTL_OK); } +nal_t the_gm_nal; + +/* + * Called when module loaded + */ +int gmnal_init(void) +{ + int rc; + + memset(&the_gm_nal, 0, sizeof(nal_t)); + CDEBUG(D_INFO, "reset nal[%p]\n", &the_gm_nal); + GMNAL_INIT_NAL(&the_gm_nal); + rc = ptl_register_nal(GMNAL, &the_gm_nal); + if (rc != PTL_OK) + CERROR("Can't register GMNAL: %d\n", rc); + + return (rc); +} + + /* * Called when module removed @@ -484,20 +556,7 @@ void gmnal_fini() CDEBUG(D_TRACE, "gmnal_fini\n"); - PtlNIFini(kgmnal_ni); - lib_fini(nal_cb); + LASSERT(global_nal_data == NULL); - gmnal_stop_rxthread(nal_data); - gmnal_stop_ctthread(nal_data); - gmnal_free_txd(nal_data); - gmnal_free_srxd(nal_data); - GMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - GMNAL_GM_UNLOCK(nal_data); - if (nal_data->sysctl) - unregister_sysctl_table (nal_data->sysctl); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + ptl_unregister_nal(GMNAL); } diff --git a/lustre/portals/knals/gmnal/gmnal_module.c b/lustre/portals/knals/gmnal/gmnal_module.c index 31f6819dabd2de3b5c55198d04f7f93f44f52fc1..278230e66edd8c14dc5ce602d33d3893a55d98a0 100644 --- a/lustre/portals/knals/gmnal/gmnal_module.c +++ b/lustre/portals/knals/gmnal/gmnal_module.c @@ -32,9 +32,6 @@ int num_rx_threads = -1; int num_stxds = 5; int gm_port = 4; -ptl_handle_ni_t kgmnal_ni; - - int gmnal_cmd(struct portals_cfg *pcfg, void *private) { @@ -92,26 +89,15 @@ gmnal_load(void) CDEBUG(D_INFO, "Calling gmnal_init\n"); - status = PtlNIInit(gmnal_init, 32, 4, 0, &kgmnal_ni); + statud = gmnal_init(); if (status == PTL_OK) { - CDEBUG(D_INFO, "Portals GMNAL initialised ok kgmnal_ni\n"); + CDEBUG(D_INFO, "Portals GMNAL initialised ok\n"); } else { CDEBUG(D_INFO, "Portals GMNAL Failed to initialise\n"); - return(1); + return(-ENODEV); } - CDEBUG(D_INFO, "Calling kportal_nal_register\n"); - /* - * global_nal_data is set by gmnal_init - */ - if (kportal_nal_register(GMNAL, &gmnal_cmd, global_nal_data) != 0) { - CDEBUG(D_INFO, "kportal_nal_register failed\n"); - return(1); - } - - CDEBUG(D_INFO, "Calling PORTAL_SYMBOL_REGISTER\n"); - PORTAL_SYMBOL_REGISTER(kgmnal_ni); CDEBUG(D_INFO, "This is the end of the gmnal init routine"); @@ -122,11 +108,7 @@ gmnal_load(void) static void __exit gmnal_unload(void) { - - kportal_nal_unregister(GMNAL); - PORTAL_SYMBOL_UNREGISTER(kgmnal_ni); gmnal_fini(); - global_nal_data = NULL; return; } @@ -135,8 +117,6 @@ module_init(gmnal_load); module_exit(gmnal_unload); -EXPORT_SYMBOL(kgmnal_ni); - MODULE_PARM(gmnal_small_msg_size, "i"); MODULE_PARM(num_rx_threads, "i"); MODULE_PARM(num_stxds, "i"); diff --git a/lustre/portals/knals/ibnal/ibnal.c b/lustre/portals/knals/ibnal/ibnal.c index 02beca7fb3dbb285fc7bbc6e036c786bb0c10b3e..86c2a6355206923d845a8321c343316a9ba7eb7b 100644 --- a/lustre/portals/knals/ibnal/ibnal.c +++ b/lustre/portals/knals/ibnal/ibnal.c @@ -235,11 +235,6 @@ kibnal_init(int interface, // no use here kibnal_data_t *nal_data = NULL; int rc; - unsigned int nnids = 1; // number of nids - // do we know how many nodes are in this - // system related to this kib_nid - // - CDEBUG(D_NET, "kibnal_init:calling lib_init with nid 0x%u\n", kibnal_data.kib_nid); @@ -252,7 +247,6 @@ kibnal_init(int interface, // no use here rc = lib_init(&kibnal_lib, kibnal_data.kib_nid, 0, // process id is set as 0 - nnids, ptl_size, ac_size); @@ -2034,16 +2028,13 @@ kibnal_initialize(void) CDEBUG(D_PORTALS, "kibnal_initialize: Enter kibnal_initialize\n"); // set api functional pointers + kibnal_api.startup = kibnal_startup; kibnal_api.forward = kibnal_forward; kibnal_api.shutdown = kibnal_shutdown; kibnal_api.yield = kibnal_yield; - kibnal_api.validate = NULL; /* our api validate is a NOOP */ kibnal_api.lock = kibnal_lock; kibnal_api.unlock = kibnal_unlock; kibnal_api.nal_data = &kibnal_data; // this is so called private data - kibnal_api.refct = 1; - kibnal_api.timeout = NULL; - kibnal_lib.nal_data = &kibnal_data; memset(&kibnal_data, 0, sizeof(kibnal_data)); diff --git a/lustre/portals/knals/qswnal/Makefile.in b/lustre/portals/knals/qswnal/Makefile.in index 7defd133256955ddf951ecaeb4cbfb4fc1e08131..17b17789ea11de01b5af7deb35f31e487e8a48ef 100644 --- a/lustre/portals/knals/qswnal/Makefile.in +++ b/lustre/portals/knals/qswnal/Makefile.in @@ -1,6 +1,6 @@ MODULES := kqswnal kqswnal-objs := qswnal.o qswnal_cb.o -EXTRA_CFLAGS := @QSWCPPFLAGS@ +EXTRA_CFLAGS := @QSWCPPFLAGS@ -I /usr/include @INCLUDE_RULES@ diff --git a/lustre/portals/knals/qswnal/qswnal.c b/lustre/portals/knals/qswnal/qswnal.c index aeadd318638bb0e0ba50d9064978a0a169b73615..5359ef7590da6af80b4f62fdd27afaa355efd2ec 100644 --- a/lustre/portals/knals/qswnal/qswnal.c +++ b/lustre/portals/knals/qswnal/qswnal.c @@ -24,9 +24,10 @@ #include "qswnal.h" -ptl_handle_ni_t kqswnal_ni; nal_t kqswnal_api; kqswnal_data_t kqswnal_data; +ptl_handle_ni_t kqswnal_ni; +kqswnal_tunables_t kqswnal_tunables; kpr_nal_interface_t kqswnal_router_interface = { kprni_nalid: QSWNAL, @@ -43,10 +44,7 @@ kpr_nal_interface_t kqswnal_router_interface = { static ctl_table kqswnal_ctl_table[] = { {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets", - &kqswnal_data.kqn_optimized_gets, sizeof (int), - 0644, NULL, &proc_dointvec}, - {QSWNAL_SYSCTL_COPY_SMALL_FWD, "copy_small_fwd", - &kqswnal_data.kqn_copy_small_fwd, sizeof (int), + &kqswnal_tunables.kqn_optimized_gets, sizeof (int), 0644, NULL, &proc_dointvec}, {0} }; @@ -100,15 +98,6 @@ kqswnal_unlock(nal_t *nal, unsigned long *flags) nal_cb->cb_sti(nal_cb,flags); } -static int -kqswnal_shutdown(nal_t *nal, int ni) -{ - CDEBUG (D_NET, "shutdown\n"); - - LASSERT (nal == &kqswnal_api); - return (0); -} - static int kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds) { @@ -148,20 +137,6 @@ kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds) return (milliseconds); } -static nal_t * -kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t requested_pid) -{ - ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid); - int nnids = kqswnal_data.kqn_nnodes; - - CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids); - - lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size); - - return (&kqswnal_api); -} - int kqswnal_get_tx_desc (struct portals_cfg *pcfg) { @@ -219,11 +194,20 @@ kqswnal_cmd (struct portals_cfg *pcfg, void *private) } } -void __exit -kqswnal_finalise (void) +static void +kqswnal_shutdown(nal_t *nal) { unsigned long flags; - int do_ptl_fini = 0; + int do_lib_fini = 0; + + /* NB The first ref was this module! */ + if (nal->nal_refct != 0) { + PORTAL_MODULE_UNUSE; + return; + } + + CDEBUG (D_NET, "shutdown\n"); + LASSERT (nal == &kqswnal_api); switch (kqswnal_data.kqn_init) { @@ -231,16 +215,11 @@ kqswnal_finalise (void) LASSERT (0); case KQN_INIT_ALL: -#if CONFIG_SYSCTL - if (kqswnal_data.kqn_sysctl != NULL) - unregister_sysctl_table (kqswnal_data.kqn_sysctl); -#endif - PORTAL_SYMBOL_UNREGISTER (kqswnal_ni); - kportal_nal_unregister(QSWNAL); + libcfs_nal_cmd_unregister(QSWNAL); /* fall through */ - case KQN_INIT_PTL: - do_ptl_fini = 1; + case KQN_INIT_LIB: + do_lib_fini = 1; /* fall through */ case KQN_INIT_DATA: @@ -353,10 +332,8 @@ kqswnal_finalise (void) kpr_deregister (&kqswnal_data.kqn_router); - if (do_ptl_fini) { - PtlNIFini (kqswnal_ni); + if (do_lib_fini) lib_fini (&kqswnal_lib); - } /**********************************************************************/ /* Unmap message buffers and free all descriptors and buffers @@ -477,7 +454,9 @@ kqswnal_finalise (void) } static int __init -kqswnal_initialise (void) +kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { #if MULTIRAIL_EKC EP_RAILMASK all_rails = EP_RAILMASK_ALL; @@ -487,22 +466,21 @@ kqswnal_initialise (void) int rc; int i; int elan_page_idx; + ptl_process_id_t my_process_id; int pkmem = atomic_read(&portal_kmemory); + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = kqswnal_lib.ni.actual_limits; + /* This module got the first ref */ + PORTAL_MODULE_USE; + return (PTL_OK); + } + LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING); CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory)); - kqswnal_api.forward = kqswnal_forward; - kqswnal_api.shutdown = kqswnal_shutdown; - kqswnal_api.yield = kqswnal_yield; - kqswnal_api.validate = NULL; /* our api validate is a NOOP */ - kqswnal_api.lock = kqswnal_lock; - kqswnal_api.unlock = kqswnal_unlock; - kqswnal_api.nal_data = &kqswnal_data; - - kqswnal_lib.nal_data = &kqswnal_data; - memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success)); memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed)); #if MULTIRAIL_EKC @@ -513,9 +491,6 @@ kqswnal_initialise (void) /* ensure all pointers NULL etc */ memset (&kqswnal_data, 0, sizeof (kqswnal_data)); - kqswnal_data.kqn_optimized_gets = KQSW_OPTIMIZED_GETS; - kqswnal_data.kqn_copy_small_fwd = KQSW_COPY_SMALL_FWD; - kqswnal_data.kqn_cb = &kqswnal_lib; INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); @@ -537,18 +512,19 @@ kqswnal_initialise (void) /* pointers/lists/locks initialised */ kqswnal_data.kqn_init = KQN_INIT_DATA; - + #if MULTIRAIL_EKC kqswnal_data.kqn_ep = ep_system(); if (kqswnal_data.kqn_ep == NULL) { CERROR("Can't initialise EKC\n"); - return (-ENODEV); + kqswnal_shutdown(&kqswnal_api); + return (PTL_IFACE_INVALID); } if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) { CERROR("Can't get elan ID\n"); - kqswnal_finalise(); - return (-ENODEV); + kqswnal_shutdown(&kqswnal_api); + return (PTL_IFACE_INVALID); } #else /**********************************************************************/ @@ -558,7 +534,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_ep == NULL) { CERROR ("Can't get elan device 0\n"); - return (-ENODEV); + kqswnal_shutdown(&kqswnal_api); + return (PTL_IFACE_INVALID); } #endif @@ -573,8 +550,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_eptx == NULL) { CERROR ("Can't allocate transmitter\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /**********************************************************************/ @@ -586,8 +563,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_eprx_small == NULL) { CERROR ("Can't install small msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } kqswnal_data.kqn_eprx_large = ep_alloc_rcvr (kqswnal_data.kqn_ep, @@ -596,8 +573,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_eprx_large == NULL) { CERROR ("Can't install large msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /**********************************************************************/ @@ -611,8 +588,8 @@ kqswnal_initialise (void) EP_PERM_WRITE); if (kqswnal_data.kqn_ep_tx_nmh == NULL) { CERROR("Can't reserve tx dma space\n"); - kqswnal_finalise(); - return (-ENOMEM); + kqswnal_shutdown(&kqswnal_api); + return (PTL_NO_SPACE); } #else dmareq.Waitfn = DDI_DMA_SLEEP; @@ -626,8 +603,8 @@ kqswnal_initialise (void) if (rc != DDI_SUCCESS) { CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } #endif /**********************************************************************/ @@ -640,8 +617,8 @@ kqswnal_initialise (void) EP_PERM_WRITE); if (kqswnal_data.kqn_ep_tx_nmh == NULL) { CERROR("Can't reserve rx dma space\n"); - kqswnal_finalise(); - return (-ENOMEM); + kqswnal_shutdown(&kqswnal_api); + return (PTL_NO_SPACE); } #else dmareq.Waitfn = DDI_DMA_SLEEP; @@ -656,8 +633,8 @@ kqswnal_initialise (void) if (rc != DDI_SUCCESS) { CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } #endif /**********************************************************************/ @@ -667,8 +644,8 @@ kqswnal_initialise (void) sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS)); if (kqswnal_data.kqn_txds == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /* clear flags, null pointers etc */ @@ -683,8 +660,8 @@ kqswnal_initialise (void) PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); if (ktx->ktx_buffer == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /* Map pre-allocated buffer NOW, to save latency on transmit */ @@ -720,8 +697,8 @@ kqswnal_initialise (void) sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE)); if (kqswnal_data.kqn_rxds == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */ @@ -755,8 +732,8 @@ kqswnal_initialise (void) struct page *page = alloc_page(GFP_KERNEL); if (page == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } krx->krx_kiov[j].kiov_page = page; @@ -800,15 +777,19 @@ kqswnal_initialise (void) /**********************************************************************/ /* Network interface ready to initialise */ - rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni); - if (rc != 0) + my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid); + my_process_id.pid = 0; + + rc = lib_init(&kqswnal_lib, my_process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { - CERROR ("PtlNIInit failed %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); + CERROR ("lib_init failed %d\n", rc); + kqswnal_shutdown (&kqswnal_api); + return (rc); } - kqswnal_data.kqn_init = KQN_INIT_PTL; + kqswnal_data.kqn_init = KQN_INIT_LIB; /**********************************************************************/ /* Queue receives, now that it's OK to run their completion callbacks */ @@ -829,8 +810,8 @@ kqswnal_initialise (void) if (rc != EP_SUCCESS) { CERROR ("failed ep_queue_receive %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_FAIL); } } @@ -842,8 +823,8 @@ kqswnal_initialise (void) if (rc != 0) { CERROR ("failed to spawn scheduling thread: %d\n", rc); - kqswnal_finalise (); - return (rc); + kqswnal_shutdown (&kqswnal_api); + return (PTL_FAIL); } } @@ -852,19 +833,13 @@ kqswnal_initialise (void) rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface); CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc); - rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL); + rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - kqswnal_finalise (); - return (rc); + kqswnal_shutdown (&kqswnal_api); + return (PTL_FAIL); } -#if CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - kqswnal_data.kqn_sysctl = register_sysctl_table (kqswnal_top_ctl_table, 0); -#endif - - PORTAL_SYMBOL_REGISTER(kqswnal_ni); kqswnal_data.kqn_init = KQN_INIT_ALL; printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d " @@ -873,9 +848,61 @@ kqswnal_initialise (void) kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled", pkmem); - return (0); + return (PTL_OK); } +void __exit +kqswnal_finalise (void) +{ +#if CONFIG_SYSCTL + if (kqswnal_tunables.kqn_sysctl != NULL) + unregister_sysctl_table (kqswnal_tunables.kqn_sysctl); +#endif + PtlNIFini(kqswnal_ni); + + ptl_unregister_nal(QSWNAL); +} + +static int __init +kqswnal_initialise (void) +{ + int rc; + + kqswnal_api.startup = kqswnal_startup; + kqswnal_api.shutdown = kqswnal_shutdown; + kqswnal_api.forward = kqswnal_forward; + kqswnal_api.yield = kqswnal_yield; + kqswnal_api.lock = kqswnal_lock; + kqswnal_api.unlock = kqswnal_unlock; + kqswnal_api.nal_data = &kqswnal_data; + + kqswnal_lib.nal_data = &kqswnal_data; + + /* Initialise dynamic tunables to defaults once only */ + kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS; + + rc = ptl_register_nal(QSWNAL, &kqswnal_api); + if (rc != PTL_OK) { + CERROR("Can't register QSWNAL: %d\n", rc); + return (-ENOMEM); /* or something... */ + } + + /* Pure gateways, and the workaround for 'EKC blocks forever until + * the service is active' want the NAL started up at module load + * time... */ + rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { + ptl_unregister_nal(QSWNAL); + return (-ENODEV); + } + +#if CONFIG_SYSCTL + /* Press on regardless even if registering sysctl doesn't work */ + kqswnal_tunables.kqn_sysctl = + register_sysctl_table (kqswnal_top_ctl_table, 0); +#endif + return (0); +} MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); MODULE_DESCRIPTION("Kernel Quadrics/Elan NAL v1.01"); @@ -883,5 +910,3 @@ MODULE_LICENSE("GPL"); module_init (kqswnal_initialise); module_exit (kqswnal_finalise); - -EXPORT_SYMBOL (kqswnal_ni); diff --git a/lustre/portals/knals/qswnal/qswnal.h b/lustre/portals/knals/qswnal/qswnal.h index 93bf584477ad06269011a273ae56539b4e20f47b..1cd42db9396b323de5dd83452925d42f3644d63b 100644 --- a/lustre/portals/knals/qswnal/qswnal.h +++ b/lustre/portals/knals/qswnal/qswnal.h @@ -74,6 +74,7 @@ #include <linux/kpr.h> #include <portals/p30.h> #include <portals/lib-p30.h> +#include <portals/nal.h> #define KQSW_CHECKSUM 0 #if KQSW_CHECKSUM @@ -192,18 +193,21 @@ typedef struct #define KTX_FORWARDING 2 /* routing a packet */ #define KTX_GETTING 3 /* local optimised get */ +typedef struct +{ + /* dynamic tunables... */ + int kqn_optimized_gets; /* optimized GETs? */ +#if CONFIG_SYSCTL + struct ctl_table_header *kqn_sysctl; /* sysctl interface */ +#endif +} kqswnal_tunables_t; + typedef struct { char kqn_init; /* what's been initialised */ char kqn_shuttingdown; /* I'm trying to shut down */ atomic_t kqn_nthreads; /* # threads running */ - int kqn_optimized_gets; /* optimized GETs? */ - int kqn_copy_small_fwd; /* fwd small msgs from pre-allocated buffer? */ - -#if CONFIG_SYSCTL - struct ctl_table_header *kqn_sysctl; /* sysctl interface */ -#endif kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */ kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */ @@ -247,12 +251,13 @@ typedef struct /* kqn_init state */ #define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ #define KQN_INIT_DATA 1 -#define KQN_INIT_PTL 2 +#define KQN_INIT_LIB 2 #define KQN_INIT_ALL 3 -extern nal_cb_t kqswnal_lib; -extern nal_t kqswnal_api; -extern kqswnal_data_t kqswnal_data; +extern nal_cb_t kqswnal_lib; +extern nal_t kqswnal_api; +extern kqswnal_tunables_t kqswnal_tunables; +extern kqswnal_data_t kqswnal_data; /* global pre-prepared replies to keep off the stack */ extern EP_STATUSBLK kqswnal_rpc_success; diff --git a/lustre/portals/knals/qswnal/qswnal_cb.c b/lustre/portals/knals/qswnal/qswnal_cb.c index 577c578018f903ebb5ab9ec6689f812ee504030b..f92f97474d624d758f1ae45d5a3d440ba074b3a3 100644 --- a/lustre/portals/knals/qswnal/qswnal_cb.c +++ b/lustre/portals/knals/qswnal/qswnal_cb.c @@ -1027,7 +1027,7 @@ kqswnal_sendmsg (nal_cb_t *nal, memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum)); #endif - if (kqswnal_data.kqn_optimized_gets && + if (kqswnal_tunables.kqn_optimized_gets && type == PTL_MSG_GET && /* doing a GET */ nid == targetnid) { /* not forwarding */ lib_md_t *md = libmsg->md; diff --git a/lustre/portals/knals/scimacnal/scimacnal.c b/lustre/portals/knals/scimacnal/scimacnal.c index 35de6ebcf77e718850867d94e38d7eeb04cf08bc..e77bd8ee278dc69428c1e5d1525825e6907dfab5 100644 --- a/lustre/portals/knals/scimacnal/scimacnal.c +++ b/lustre/portals/knals/scimacnal/scimacnal.c @@ -26,7 +26,6 @@ #include "scimacnal.h" -ptl_handle_ni_t kscimacnal_ni; nal_t kscimacnal_api; kscimacnal_data_t kscimacnal_data; @@ -101,10 +100,34 @@ static void kscimacnal_unlock(nal_t *nal, unsigned long *flags) } -static int kscimacnal_shutdown(nal_t *nal, int ni) +static void kscimacnal_shutdown(nal_t *nal, int ni) { LASSERT (nal == &kscimacnal_api); - return 0; + LASSERT (kscimacnal_data.ksci_init); + + if (nal->nal_refct != 0) + return; + + /* Called on last matching PtlNIFini() */ + + /* FIXME: How should the shutdown procedure really look? + */ + kscimacnal_data.ksci_shuttingdown=1; + + /* Stop handling ioctls */ + libcfs_nal_cmd_unregister(SCIMACNAL); + + mac_finish(kscimacnal_data.ksci_machandle); + + /* finalise lib after net shuts up */ + lib_fini(&kscimacnal_lib); + + kscimacnal_data.ksci_init = 0; + + /* Allow unload */ + PORTAL_MODULE_UNUSE; + + return; } @@ -123,56 +146,26 @@ static void kscimacnal_yield( nal_t *nal, unsigned long *flags, int milliseconds } -static nal_t *kscimacnal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - int nnids = 512; /* FIXME: Need ScaMac funktion to get #nodes */ - - CDEBUG(D_NET, "calling lib_init with nid "LPX64" nnids %d\n", kscimacnal_data.ksci_nid, nnids); - lib_init(&kscimacnal_lib, kscimacnal_data.ksci_nid, 0, nnids,ptl_size, ac_size); - return &kscimacnal_api; -} - - -/* Called by kernel at module unload time */ -static void /*__exit*/ -kscimacnal_finalize(void) -{ - /* FIXME: How should the shutdown procedure really look? */ - kscimacnal_data.ksci_shuttingdown=1; - - PORTAL_SYMBOL_UNREGISTER(kscimacnal_ni); - - PtlNIFini(kscimacnal_ni); - lib_fini(&kscimacnal_lib); - - mac_finish(kscimacnal_data.ksci_machandle); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); - - return; -} - - -/* Called by kernel at module insertion time */ -static int __init -kscimacnal_initialize(void) +static int kscimacnal_startup(nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { int rc; - unsigned long nid=0; + mac_physaddr_t mac_physaddr; + ptl_process_id_t process_id; mac_handle_t *machandle = NULL; + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = kscimacnal_lib.ni.actual_limits; + return (PTL_OK); + } - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); - - kscimacnal_api.forward = kscimacnal_forward; - kscimacnal_api.shutdown = kscimacnal_shutdown; - kscimacnal_api.yield = kscimacnal_yield; - kscimacnal_api.validate = NULL; /* our api validate is a NOOP */ - kscimacnal_api.lock= kscimacnal_lock; - kscimacnal_api.unlock= kscimacnal_unlock; - kscimacnal_api.nal_data = &kscimacnal_data; + /* Called on first PtlNIInit(SCIMACNAL) */ + LASSERT (nal == kscimacnal_api); + LASSERT (!kscimacnal_data.ksci_init); + kscimacnal_lib.nal_data = &kscimacnal_data; memset(&kscimacnal_data, 0, sizeof(kscimacnal_data)); @@ -188,7 +181,7 @@ kscimacnal_initialize(void) if(!machandle) { CERROR("mac_init() failed\n"); - return -1; + return PTL_FAIL; } kscimacnal_data.ksci_machandle = machandle; @@ -199,45 +192,88 @@ kscimacnal_initialize(void) mac_get_mtusize(machandle), SCIMACNAL_MTU); CERROR("Consult README.scimacnal for more information\n"); mac_finish(machandle); - return -1; + return PTL_FAIL; } /* Get the node ID */ /* mac_get_physaddrlen() is a function instead of define, sigh */ - LASSERT(mac_get_physaddrlen(machandle) <= sizeof(nid)); - if(mac_get_physaddr(machandle, (mac_physaddr_t *) &nid)) { + LASSERT(mac_get_physaddrlen(machandle) <= sizeof(mac_physaddr)); + if(mac_get_physaddr(machandle, &mac_physaddr)) { CERROR("mac_get_physaddr() failed\n"); mac_finish(machandle); - return -1; + return PTL_FAIL; } - nid = ntohl(nid); - kscimacnal_data.ksci_nid = nid; + kscimacnal_data.ksci_nid = (ptl_nid_t)(ntohl(mac_physaddr)); + process_id.pid = 0; + process_id.nid = kscimacnal_data.ksci_nid; - /* Initialize Network Interface */ - /* FIXME: What do the magic numbers mean? Documentation anyone? */ - rc = PtlNIInit(kscimacnal_init, 32, 4, 0, &kscimacnal_ni); - if (rc) { + CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", + kscimacnal_data.ksci_nid); + + rc = lib_init(&kscimacnal_lib, process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { CERROR("PtlNIInit failed %d\n", rc); mac_finish(machandle); - return (-ENOMEM); + return (rc); } /* Init command interface */ - rc = kportal_nal_register (SCIMACNAL, &kscimacnal_cmd, NULL); + rc = libcfs_nal_cmd_register (SCIMACNAL, &kscimacnal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - PtlNIFini(kscimacnal_ni); + lib_fini(&kscimacnal_lib); mac_finish(machandle); - return (rc); + return (PTL_FAIL); } - - PORTAL_SYMBOL_REGISTER(kscimacnal_ni); - /* We're done now, it's OK for the RX callback to do stuff */ kscimacnal_data.ksci_init = 1; + /* Prevent unload before matching PtlNIFini() */ + PORTAL_MODULE_USE; + + return (PTL_OK); +} + + +/* Called by kernel at module unload time */ +static void /*__exit*/ +kscimacnal_finalize(void) +{ + LASSERT (!kscimacnal_data.ksci_init); + + ptl_unregister_nal(SCIMACNAL); + + CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); + + return; +} + + +/* Called by kernel at module insertion time */ +static int __init +kscimacnal_initialize(void) +{ + int rc; + + CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); + + kscimacnal_api.startup = kscimacnal_startup; + kscimacnal_api.forward = kscimacnal_forward; + kscimacnal_api.shutdown = kscimacnal_shutdown; + kscimacnal_api.yield = kscimacnal_yield; + kscimacnal_api.lock= kscimacnal_lock; + kscimacnal_api.unlock= kscimacnal_unlock; + kscimacnal_api.nal_data = &kscimacnal_data; + + rc = ptl_register_nal(SCIMACNAL, &kscimacnal_api); + if (rc != PTL_OK) { + CERROR("Can't register SCIMACNAL: %d\n", rc); + return (-ENODEV); + } + return 0; } diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index d874a6cf58cd1a47faf4e1d4f7441c36d92531f2..32bbbec5262d2597ea91ee5c23c062a65b91a743 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -25,13 +25,10 @@ #include "socknal.h" +nal_t ksocknal_api; +ksock_nal_data_t ksocknal_data; ptl_handle_ni_t ksocknal_ni; -static nal_t ksocknal_api; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -ksock_nal_data_t ksocknal_data; -#else -static ksock_nal_data_t ksocknal_data; -#endif +ksock_tunables_t ksocknal_tunables; kpr_nal_interface_t ksocknal_router_interface = { kprni_nalid: SOCKNAL, @@ -40,6 +37,7 @@ kpr_nal_interface_t ksocknal_router_interface = { kprni_notify: ksocknal_notify, }; +#ifdef CONFIG_SYSCTL #define SOCKNAL_SYSCTL 200 #define SOCKNAL_SYSCTL_TIMEOUT 1 @@ -50,21 +48,21 @@ kpr_nal_interface_t ksocknal_router_interface = { static ctl_table ksocknal_ctl_table[] = { {SOCKNAL_SYSCTL_TIMEOUT, "timeout", - &ksocknal_data.ksnd_io_timeout, sizeof (int), + &ksocknal_tunables.ksnd_io_timeout, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack", - &ksocknal_data.ksnd_eager_ack, sizeof (int), + &ksocknal_tunables.ksnd_eager_ack, sizeof (int), 0644, NULL, &proc_dointvec}, #if SOCKNAL_ZC {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy", - &ksocknal_data.ksnd_zc_min_frag, sizeof (int), + &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int), 0644, NULL, &proc_dointvec}, #endif {SOCKNAL_SYSCTL_TYPED, "typed", - &ksocknal_data.ksnd_typed_conns, sizeof (int), + &ksocknal_tunables.ksnd_typed_conns, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", - &ksocknal_data.ksnd_min_bulk, sizeof (int), + &ksocknal_tunables.ksnd_min_bulk, sizeof (int), 0644, NULL, &proc_dointvec}, { 0 } }; @@ -73,6 +71,7 @@ static ctl_table ksocknal_top_ctl_table[] = { {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, { 0 } }; +#endif int ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, @@ -88,12 +87,6 @@ ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, return PTL_OK; } -int -ksocknal_api_shutdown(nal_t *nal, int ni) -{ - return PTL_OK; -} - void ksocknal_api_lock(nal_t *nal, unsigned long *flags) { @@ -154,19 +147,6 @@ ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds) return (milliseconds); } -nal_t * -ksocknal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); - lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); - return (&ksocknal_api); -} - -/* - * EXTRA functions follow - */ - int ksocknal_set_mynid(ptl_nid_t nid) { @@ -832,7 +812,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, /* Set the deadline for the outgoing HELLO to drain */ conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; list_add (&conn->ksnc_list, &peer->ksnp_conns); atomic_inc (&conn->ksnc_refcount); @@ -1466,30 +1446,34 @@ ksocknal_free_buffers (void) } void -ksocknal_module_fini (void) +ksocknal_api_shutdown (nal_t *nal) { int i; + if (nal->nal_refct != 0) { + /* This module got the first ref */ + PORTAL_MODULE_UNUSE; + return; + } + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); + LASSERT(nal == &ksocknal_api); + switch (ksocknal_data.ksnd_init) { default: LASSERT (0); case SOCKNAL_INIT_ALL: -#if CONFIG_SYSCTL - if (ksocknal_data.ksnd_sysctl != NULL) - unregister_sysctl_table (ksocknal_data.ksnd_sysctl); -#endif - kportal_nal_unregister(SOCKNAL); - PORTAL_SYMBOL_UNREGISTER (ksocknal_ni); + libcfs_nal_cmd_unregister(SOCKNAL); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; /* fall through */ - case SOCKNAL_INIT_PTL: + case SOCKNAL_INIT_LIB: /* No more calls to ksocknal_cmd() to create new * autoroutes/connections since we're being unloaded. */ - PtlNIFini(ksocknal_ni); /* Delete all autoroute entries */ ksocknal_del_route(PTL_NID_ANY, 0, 0, 0); @@ -1510,6 +1494,8 @@ ksocknal_module_fini (void) /* Tell lib we've stopped calling into her. */ lib_fini(&ksocknal_lib); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; /* fall through */ case SOCKNAL_INIT_DATA: @@ -1557,6 +1543,8 @@ ksocknal_module_fini (void) kpr_deregister (&ksocknal_data.ksnd_router); ksocknal_free_buffers(); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; /* fall through */ case SOCKNAL_INIT_NOTHING: @@ -1571,7 +1559,7 @@ ksocknal_module_fini (void) } -void __init +void ksocknal_init_incarnation (void) { struct timeval tv; @@ -1587,42 +1575,31 @@ ksocknal_init_incarnation (void) (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; } -int __init -ksocknal_module_init (void) +int +ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; + ptl_process_id_t process_id; + int pkmem = atomic_read(&portal_kmemory); + int rc; + int i; + int j; - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - /* the following must be sizeof(int) for proc_dointvec() */ - LASSERT(sizeof (ksocknal_data.ksnd_io_timeout) == sizeof (int)); - LASSERT(sizeof (ksocknal_data.ksnd_eager_ack) == sizeof (int)); - /* check ksnr_connected/connecting field large enough */ - LASSERT(SOCKNAL_CONN_NTYPES <= 4); - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); + LASSERT (nal == &ksocknal_api); - ksocknal_api.forward = ksocknal_api_forward; - ksocknal_api.shutdown = ksocknal_api_shutdown; - ksocknal_api.validate = NULL; /* our api validate is a NOOP */ - ksocknal_api.lock = ksocknal_api_lock; - ksocknal_api.unlock = ksocknal_api_unlock; - ksocknal_api.nal_data = &ksocknal_data; + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = ksocknal_lib.ni.actual_limits; + /* This module got the first ref */ + PORTAL_MODULE_USE; + return (PTL_OK); + } - ksocknal_lib.nal_data = &ksocknal_data; + LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ - ksocknal_data.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; - ksocknal_data.ksnd_eager_ack = SOCKNAL_EAGER_ACK; - ksocknal_data.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; - ksocknal_data.ksnd_min_bulk = SOCKNAL_MIN_BULK; -#if SOCKNAL_ZC - ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; -#endif ksocknal_init_incarnation(); ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; @@ -1669,7 +1646,7 @@ ksocknal_module_init (void) PORTAL_ALLOC(ksocknal_data.ksnd_schedulers, sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } @@ -1685,15 +1662,19 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - ksocknal_module_fini (); + /* NB we have to wait to be told our true NID... */ + process_id.pid = 0; + process_id.nid = 0; + + rc = lib_init(&ksocknal_lib, process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { + CERROR("lib_init failed: error %d\n", rc); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PtlNIDebug(ksocknal_ni, ~0); - ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; // flag lib_init() called for (i = 0; i < SOCKNAL_N_SCHED; i++) { rc = ksocknal_thread_start (ksocknal_scheduler, @@ -1701,7 +1682,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1710,7 +1691,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i)); if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1718,7 +1699,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_reaper, NULL); if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } @@ -1728,7 +1709,7 @@ ksocknal_module_init (void) CDEBUG(D_NET, "Can't initialise routing interface " "(rc = %d): not routing\n", rc); } else { - /* Only allocate forwarding buffers if I'm on a gateway */ + /* Only allocate forwarding buffers if there's a router */ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { @@ -1744,7 +1725,7 @@ ksocknal_module_init (void) PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, fmb_kiov[pool->fmp_buff_pages])); if (fmb == NULL) { - ksocknal_module_fini(); + ksocknal_api_shutdown(&ksocknal_api); return (-ENOMEM); } @@ -1754,7 +1735,7 @@ ksocknal_module_init (void) fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL); if (fmb->fmb_kiov[j].kiov_page == NULL) { - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } @@ -1765,19 +1746,13 @@ ksocknal_module_init (void) } } - rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL); + rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PORTAL_SYMBOL_REGISTER(ksocknal_ni); - -#ifdef CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - ksocknal_data.ksnd_sysctl = register_sysctl_table (ksocknal_top_ctl_table, 0); -#endif /* flag everything initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; @@ -1789,6 +1764,75 @@ ksocknal_module_init (void) return (0); } +void __exit +ksocknal_module_fini (void) +{ +#ifdef CONFIG_SYSCTL + if (ksocknal_tunables.ksnd_sysctl != NULL) + unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl); +#endif + PtlNIFini(ksocknal_ni); + + ptl_unregister_nal(SOCKNAL); +} + +int __init +ksocknal_module_init (void) +{ + int rc; + + /* packet descriptor must fit in a router descriptor's scratchpad */ + LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); + /* the following must be sizeof(int) for proc_dointvec() */ + LASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int)); +#if SOCKNAL_ZC + LASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int)); +#endif + /* check ksnr_connected/connecting field large enough */ + LASSERT(SOCKNAL_CONN_NTYPES <= 4); + + ksocknal_api.startup = ksocknal_api_startup; + ksocknal_api.forward = ksocknal_api_forward; + ksocknal_api.shutdown = ksocknal_api_shutdown; + ksocknal_api.lock = ksocknal_api_lock; + ksocknal_api.unlock = ksocknal_api_unlock; + ksocknal_api.nal_data = &ksocknal_data; + + ksocknal_lib.nal_data = &ksocknal_data; + + /* Initialise dynamic tunables to defaults once only */ + ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; + ksocknal_tunables.ksnd_eager_ack = SOCKNAL_EAGER_ACK; + ksocknal_tunables.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; + ksocknal_tunables.ksnd_min_bulk = SOCKNAL_MIN_BULK; +#if SOCKNAL_ZC + ksocknal_tunables.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; +#endif + + rc = ptl_register_nal(SOCKNAL, &ksocknal_api); + if (rc != PTL_OK) { + CERROR("Can't register SOCKNAL: %d\n", rc); + return (-ENOMEM); /* or something... */ + } + + /* Pure gateways want the NAL started up at module load time... */ + rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { + ptl_unregister_nal(SOCKNAL); + return (-ENODEV); + } + +#ifdef CONFIG_SYSCTL + /* Press on regardless even if registering sysctl doesn't work */ + ksocknal_tunables.ksnd_sysctl = + register_sysctl_table (ksocknal_top_ctl_table, 0); +#endif + return (0); +} + MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01"); MODULE_LICENSE("GPL"); @@ -1796,4 +1840,3 @@ MODULE_LICENSE("GPL"); module_init(ksocknal_module_init); module_exit(ksocknal_module_fini); -EXPORT_SYMBOL (ksocknal_ni); diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index bd3c1fba1e525698d00d425b4cdd02e39899442d..e1e3aaca7cacc5919d27ac694a95f68097ec693d 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -64,6 +64,7 @@ #include <linux/kpr.h> #include <portals/p30.h> #include <portals/lib-p30.h> +#include <portals/nal.h> #include <portals/socknal.h> #if CONFIG_SMP @@ -141,7 +142,6 @@ typedef struct { } ksock_irqinfo_t; typedef struct { - int ksnd_init; /* initialisation state */ int ksnd_io_timeout; /* "stuck" socket timeout (seconds) */ int ksnd_eager_ack; /* make TCP ack eagerly? */ int ksnd_typed_conns; /* drive sockets by type? */ @@ -150,6 +150,10 @@ typedef struct { unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */ #endif struct ctl_table_header *ksnd_sysctl; /* sysctl interface */ +} ksock_tunables_t; + +typedef struct { + int ksnd_init; /* initialisation state */ __u64 ksnd_incarnation; /* my epoch */ rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */ @@ -194,7 +198,7 @@ typedef struct { #define SOCKNAL_INIT_NOTHING 0 #define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_PTL 2 +#define SOCKNAL_INIT_LIB 2 #define SOCKNAL_INIT_ALL 3 /* A packet just assembled for transmission is represented by 1 or more @@ -362,6 +366,7 @@ typedef struct ksock_peer extern nal_cb_t ksocknal_lib; extern ksock_nal_data_t ksocknal_data; +extern ksock_tunables_t ksocknal_tunables; static inline struct list_head * ksocknal_nid2peerlist (ptl_nid_t nid) diff --git a/lustre/portals/knals/socknal/socknal_cb.c b/lustre/portals/knals/socknal/socknal_cb.c index ebb32da2bdecb9304f854f793387af1e7bcd6aff..861c07dcbee4692d72d43f25420a1592aa659ed2 100644 --- a/lustre/portals/knals/socknal/socknal_cb.c +++ b/lustre/portals/knals/socknal/socknal_cb.c @@ -262,7 +262,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) LASSERT (tx->tx_nkiov > 0); #if SOCKNAL_ZC - if (fragsize >= ksocknal_data.ksnd_zc_min_frag && + if (fragsize >= ksocknal_tunables.ksnd_zc_min_frag && (sock->sk->route_caps & NETIF_F_SG) && (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) { @@ -381,7 +381,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) * is set. Instead, we presume peer death has occurred if * the socket doesn't drain within a timout */ conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; conn->ksnc_peer->ksnp_last_alive = jiffies; } while (tx->tx_resid != 0); @@ -444,7 +444,7 @@ ksocknal_recv_iov (ksock_conn_t *conn) /* received something... */ conn->ksnc_peer->ksnp_last_alive = jiffies; conn->ksnc_rx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; @@ -503,7 +503,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) /* received something... */ conn->ksnc_peer->ksnp_last_alive = jiffies; conn->ksnc_rx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; @@ -562,7 +562,7 @@ ksocknal_receive (ksock_conn_t *conn) if (conn->ksnc_rx_nob_wanted == 0) { /* Completed a message segment (header or payload) */ - if ((ksocknal_data.ksnd_eager_ack & conn->ksnc_type) != 0 && + if ((ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0 && (conn->ksnc_rx_state == SOCKNAL_RX_BODY || conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) { /* Remind the socket to ack eagerly... */ @@ -723,7 +723,7 @@ ksocknal_launch_autoconnect_locked (ksock_route_t *route) LASSERT ((route->ksnr_connected & KSNR_TYPED_ROUTES) != KSNR_TYPED_ROUTES); LASSERT (!route->ksnr_connecting); - if (ksocknal_data.ksnd_typed_conns) + if (ksocknal_tunables.ksnd_typed_conns) route->ksnr_connecting = KSNR_TYPED_ROUTES & ~route->ksnr_connected; else @@ -797,7 +797,7 @@ ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) fnob = nob; } - if (!ksocknal_data.ksnd_typed_conns) + if (!ksocknal_tunables.ksnd_typed_conns) continue; switch (c->ksnc_type) { @@ -808,11 +808,11 @@ ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) case SOCKNAL_CONN_BULK_IN: continue; case SOCKNAL_CONN_BULK_OUT: - if (tx->tx_nob < ksocknal_data.ksnd_min_bulk) + if (tx->tx_nob < ksocknal_tunables.ksnd_min_bulk) continue; break; case SOCKNAL_CONN_CONTROL: - if (tx->tx_nob >= ksocknal_data.ksnd_min_bulk) + if (tx->tx_nob >= ksocknal_tunables.ksnd_min_bulk) continue; break; } @@ -856,7 +856,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) spin_lock_irqsave (&sched->kss_lock, flags); conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; mb(); /* order with list_add_tail */ list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); @@ -2182,7 +2182,7 @@ ksocknal_setup_sock (struct socket *sock) /* Keepalives: If 3/4 of the timeout elapses, start probing every * second until the timeout elapses. */ - option = (ksocknal_data.ksnd_io_timeout * 3) / 4; + option = (ksocknal_tunables.ksnd_io_timeout * 3) / 4; set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, (char *)&option, sizeof (option)); @@ -2202,7 +2202,7 @@ ksocknal_setup_sock (struct socket *sock) return (rc); } - option = ksocknal_data.ksnd_io_timeout / 4; + option = ksocknal_tunables.ksnd_io_timeout / 4; set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, (char *)&option, sizeof (option)); @@ -2259,7 +2259,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) /* Set the socket timeouts, so our connection attempt completes in * finite time */ - tv.tv_sec = ksocknal_data.ksnd_io_timeout; + tv.tv_sec = ksocknal_tunables.ksnd_io_timeout; tv.tv_usec = 0; set_fs (KERNEL_DS); @@ -2268,7 +2268,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) set_fs (oldmm); if (rc != 0) { CERROR ("Can't set send timeout %d: %d\n", - ksocknal_data.ksnd_io_timeout, rc); + ksocknal_tunables.ksnd_io_timeout, rc); goto out; } @@ -2278,7 +2278,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) set_fs (oldmm); if (rc != 0) { CERROR ("Can't set receive timeout %d: %d\n", - ksocknal_data.ksnd_io_timeout, rc); + ksocknal_tunables.ksnd_io_timeout, rc); goto out; } @@ -2652,9 +2652,9 @@ ksocknal_reaper (void *arg) * timeout on any connection within (n+1)/n times the * timeout interval. */ - if (ksocknal_data.ksnd_io_timeout > n * p) + if (ksocknal_tunables.ksnd_io_timeout > n * p) chunk = (chunk * n * p) / - ksocknal_data.ksnd_io_timeout; + ksocknal_tunables.ksnd_io_timeout; if (chunk == 0) chunk = 1; diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index 914b78f9b385441f453ae3a12b6938c25a5caade..4e43aa57292027ffe15fe352a3d9ba2683ce3e04 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -949,19 +949,20 @@ void portals_run_lbug_upcall(char *file, const char *fn, const int line) char *portals_nid2str(int nal, ptl_nid_t nid, char *str) { switch(nal){ -/* XXX this should be a nal method of some sort */ +/* XXX this could be a nal method of some sort, 'cept it's config + * dependent whether (say) socknal NIDs are actually IP addresses... */ #ifndef CRAY_PORTALS case TCPNAL: /* userspace NAL */ case SOCKNAL: - sprintf(str, "%u:%d.%d.%d.%d", (__u32)(nid >> 32), - HIPQUAD(nid)); + snprintf(str, PTL_NALFMT_SIZE-1, + "%u:%d.%d.%d.%d", (__u32)(nid >> 32), HIPQUAD(nid)); break; case QSWNAL: case GMNAL: case IBNAL: case SCIMACNAL: - sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid); + snprintf(str, PTL_NALFMT_SIZE-1, LPD64, nid); break; #endif default: diff --git a/lustre/portals/libcfs/module.c b/lustre/portals/libcfs/module.c index 9daa8e0b1fdca7848f08125f2ecc1db4264d813b..a53ea6b41e8bbec1b84ee8090f9e0cb7a91975e7 100644 --- a/lustre/portals/libcfs/module.c +++ b/lustre/portals/libcfs/module.c @@ -51,7 +51,13 @@ #define PORTAL_MINOR 240 -extern void (kping_client)(struct portal_ioctl_data *); +struct nal_cmd_handler { + nal_cmd_handler_fn *nch_handler; + void *nch_private; +}; + +static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; +static DECLARE_MUTEX(nal_cmd_sem); #ifdef PORTAL_DEBUG void kportal_assertion_failed(char *expr, char *file, const char *func, @@ -239,6 +245,62 @@ static inline void freedata(void *data, int len) PORTAL_FREE(data, len); } +int +libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private) +{ + int rc = 0; + + CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); + + if (nal > 0 && nal <= NAL_MAX_NR) { + down(&nal_cmd_sem); + if (nal_cmd[nal].nch_handler != NULL) + rc = -EBUSY; + else { + nal_cmd[nal].nch_handler = handler; + nal_cmd[nal].nch_private = private; + } + up(&nal_cmd_sem); + } + return rc; +} +EXPORT_SYMBOL(libcfs_nal_cmd_register); + +void +libcfs_nal_cmd_unregister(int nal) +{ + CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); + + LASSERT(nal > 0 && nal <= NAL_MAX_NR); + LASSERT(nal_cmd[nal].nch_handler != NULL); + + down(&nal_cmd_sem); + nal_cmd[nal].nch_handler = NULL; + nal_cmd[nal].nch_private = NULL; + up(&nal_cmd_sem); +} +EXPORT_SYMBOL(libcfs_nal_cmd_unregister); + +int +libcfs_nal_cmd(struct portals_cfg *pcfg) +{ + __u32 nal = pcfg->pcfg_nal; + int rc = -EINVAL; + ENTRY; + + down(&nal_cmd_sem); + if (nal > 0 && nal <= NAL_MAX_NR && + nal_cmd[nal].nch_handler != NULL) { + CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, + pcfg->pcfg_command); + rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private); + } + up(&nal_cmd_sem); + + RETURN(rc); +} +EXPORT_SYMBOL(libcfs_nal_cmd); + static DECLARE_RWSEM(ioctl_list_sem); static LIST_HEAD(ioctl_list); @@ -356,6 +418,27 @@ static int libcfs_ioctl(struct inode *inode, struct file *file, err = -EFAULT; break; #endif + case IOC_PORTAL_NAL_CMD: { + struct portals_cfg pcfg; + + LASSERT (data->ioc_plen1 == sizeof(pcfg)); + if (copy_from_user(&pcfg, (void *)data->ioc_pbuf1, + sizeof(pcfg))) { + err = -EFAULT; + break; + } + + CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, + pcfg.pcfg_command); + err = libcfs_nal_cmd(&pcfg); + + if (err == 0 && + copy_to_user((char *)data->ioc_pbuf1, &pcfg, + sizeof (pcfg))) + err = -EFAULT; + break; + } + case IOC_PORTAL_MEMHOG: if (!capable (CAP_SYS_ADMIN)) err = -EPERM; diff --git a/lustre/portals/portals/api-eq.c b/lustre/portals/portals/api-eq.c index 390156a3be065ca80d80ca2bb53e3e7b23917dbc..7fc95fabf62e689165791ee6a85fdd42c54a9097 100644 --- a/lustre/portals/portals/api-eq.c +++ b/lustre/portals/portals/api-eq.c @@ -25,28 +25,6 @@ #include <portals/api-support.h> -int ptl_eq_init(void) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_fini(void) -{ - /* Nothing to do anymore... */ -} - -int ptl_eq_ni_init(nal_t * nal) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_ni_fini(nal_t * nal) -{ - /* Nothing to do anymore... */ -} - int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev) { int new_index = eq->sequence & (eq->size - 1); diff --git a/lustre/portals/portals/api-errno.c b/lustre/portals/portals/api-errno.c index 0e155daa965e63d29e33cf7820ed7ac37004ba4a..1c01c88f9c08c90785de3926cd3bc363ab1a641c 100644 --- a/lustre/portals/portals/api-errno.c +++ b/lustre/portals/portals/api-errno.c @@ -36,8 +36,7 @@ const char *ptl_err_str[] = { "PTL_MD_NO_UPDATE", "PTL_FAIL", - "PTL_IOV_TOO_MANY", - "PTL_IOV_TOO_SMALL", + "PTL_IOV_INVALID", "PTL_EQ_IN_USE", diff --git a/lustre/portals/portals/api-init.c b/lustre/portals/portals/api-init.c index e41bad8668b37ec99108fbf3b2fab442d5c7a27a..08d615d8a23c0155f50a436de25bdd97311951bb 100644 --- a/lustre/portals/portals/api-init.c +++ b/lustre/portals/portals/api-init.c @@ -25,41 +25,20 @@ #include <portals/api-support.h> -int ptl_init; - -int __p30_initialized; -int __p30_myr_initialized; -int __p30_ip_initialized; -ptl_handle_ni_t __myr_ni_handle; -ptl_handle_ni_t __ip_ni_handle; - int PtlInit(int *max_interfaces) { if (max_interfaces != NULL) - *max_interfaces = NAL_ENUM_END_MARKER; - - if (ptl_init) - return PTL_OK; + *max_interfaces = NAL_MAX_NR; LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO")); - ptl_ni_init(); - ptl_me_init(); - ptl_eq_init(); - ptl_init = 1; - - return PTL_OK; + return ptl_ni_init(); } void PtlFini(void) { - - /* Reverse order of initialization */ - ptl_eq_fini(); - ptl_me_fini(); ptl_ni_fini(); - ptl_init = 0; } diff --git a/lustre/portals/portals/api-me.c b/lustre/portals/portals/api-me.c index e724e5859307e2627908349b5c8f59903e5a1513..219aa5e08191ffb70e53894e8331b01a0138b1ff 100644 --- a/lustre/portals/portals/api-me.c +++ b/lustre/portals/portals/api-me.c @@ -25,18 +25,3 @@ #include <portals/api-support.h> -int ptl_me_init(void) -{ - return PTL_OK; -} -void ptl_me_fini(void) -{ /* Nothing to do */ -} -int ptl_me_ni_init(nal_t * nal) -{ - return PTL_OK; -} - -void ptl_me_ni_fini(nal_t * nal) -{ /* Nothing to do... */ -} diff --git a/lustre/portals/portals/api-ni.c b/lustre/portals/portals/api-ni.c index 02082c678b93a605fd9c85662bf1e9a3c323f231..81afd0a5ee70af6b653804dda6bdb70837204e60 100644 --- a/lustre/portals/portals/api-ni.c +++ b/lustre/portals/portals/api-ni.c @@ -25,13 +25,36 @@ #include <portals/api-support.h> +int ptl_init; + /* Put some magic in the NI handle so uninitialised/zeroed handles are easy * to spot */ #define NI_HANDLE_MAGIC 0xebc0de00 #define NI_HANDLE_MASK 0x000000ff -#define MAX_NIS 8 -static nal_t *ptl_interfaces[MAX_NIS]; -int ptl_num_interfaces = 0; + +static struct nal_t *ptl_nal_table[NAL_MAX_NR]; + +#ifdef __KERNEL__ +DECLARE_MUTEX(ptl_mutex); + +static void ptl_mutex_enter (void) +{ + down (&ptl_mutex); +} + +static void ptl_mutex_exit (void) +{ + up (&ptl_mutex); +} +#else +static void ptl_mutex_enter (void) +{ +} + +static void ptl_mutex_exit (void) +{ +} +#endif nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) { @@ -46,147 +69,188 @@ nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) return NULL; idx &= NI_HANDLE_MASK; - if (idx < MAX_NIS) - return ptl_interfaces[idx]; + + if (idx >= NAL_MAX_NR || + ptl_nal_table[idx] == NULL || + ptl_nal_table[idx]->nal_refct == 0) + return NULL; - return NULL; + return ptl_nal_table[idx]; } -int ptl_ni_init(void) +int ptl_register_nal (ptl_interface_t interface, nal_t *nal) { - int i; - - LASSERT (MAX_NIS <= (NI_HANDLE_MASK + 1)); + int rc; - for (i = 0; i < MAX_NIS; i++) - ptl_interfaces[i] = NULL; + ptl_mutex_enter(); + + if (interface < 0 || interface >= NAL_MAX_NR) + rc = PTL_IFACE_INVALID; + else if (ptl_nal_table[interface] != NULL) + rc = PTL_IFACE_DUP; + else { + rc = PTL_OK; + ptl_nal_table[interface] = nal; + LASSERT(nal->nal_refct == 0); + } - return PTL_OK; + ptl_mutex_exit(); + return (rc); } -void ptl_ni_fini(void) +void ptl_unregister_nal (ptl_interface_t interface) { - int i; - - for (i = 0; i < MAX_NIS; i++) { - nal_t *nal = ptl_interfaces[i]; - if (!nal) - continue; + LASSERT(interface >= 0 && interface < NAL_MAX_NR); + LASSERT(ptl_nal_table[interface] != NULL); + LASSERT(ptl_nal_table[interface]->nal_refct == 0); + + ptl_mutex_enter(); + + ptl_nal_table[interface] = NULL; - if (nal->shutdown) - nal->shutdown(nal, i); - } + ptl_mutex_exit(); } -#ifdef __KERNEL__ -DECLARE_MUTEX(ptl_ni_init_mutex); - -static void ptl_ni_init_mutex_enter (void) +int ptl_ni_init(void) { - down (&ptl_ni_init_mutex); -} + /* If this assertion fails, we need more bits in NI_HANDLE_MASK and + * to shift NI_HANDLE_MAGIC left appropriately */ + LASSERT (NAL_MAX_NR <= (NI_HANDLE_MASK + 1)); + + ptl_mutex_enter(); + + if (!ptl_init) { + /* NULL pointers, clear flags */ + memset(ptl_nal_table, 0, sizeof(ptl_nal_table)); +#ifndef __KERNEL__ + /* Kernel NALs register themselves when their module loads, + * and unregister themselves when their module is unloaded. + * Userspace NALs, are plugged in explicitly here... */ + { + extern nal_t procapi_nal; + + /* XXX pretend it's socknal to keep liblustre happy... */ + ptl_nal_table[SOCKNAL] = &procapi_nal; + LASSERT (procapi_nal.nal_refct == 0); + } +#endif + ptl_init = 1; + } -static void ptl_ni_init_mutex_exit (void) -{ - up (&ptl_ni_init_mutex); + ptl_mutex_exit(); + + return PTL_OK; } -#else -static void ptl_ni_init_mutex_enter (void) +void ptl_ni_fini(void) { -} + nal_t *nal; + int i; + + ptl_mutex_enter(); + + if (ptl_init) { + for (i = 0; i < NAL_MAX_NR; i++) { + + nal = ptl_nal_table[i]; + if (nal == NULL) + continue; + + if (nal->nal_refct != 0) { + CWARN("NAL %d has outstanding refcount %d\n", + i, nal->nal_refct); + nal->shutdown(nal); + } + + ptl_nal_table[i] = NULL; + } -static void ptl_ni_init_mutex_exit (void) -{ + ptl_init = 0; + } + + ptl_mutex_exit(); } -#endif - -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, ptl_pid_t requested_pid, - ptl_handle_ni_t * handle) +int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, + ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, + ptl_handle_ni_t *handle) { nal_t *nal; - int i; + int i; + int rc; if (!ptl_init) return PTL_NO_INIT; - ptl_ni_init_mutex_enter (); + ptl_mutex_enter (); - nal = interface(ptl_num_interfaces, ptl_size, acl_size, requested_pid); - - if (!nal) { - ptl_ni_init_mutex_exit (); - return PTL_NAL_FAILED; + if (interface == PTL_IFACE_DEFAULT) { + for (i = 0; i < NAL_MAX_NR; i++) + if (ptl_nal_table[i] != NULL) { + interface = i; + break; + } + /* NB if no interfaces are registered, 'interface' will + * fail the valid test below */ } - - for (i = 0; i < ptl_num_interfaces; i++) { - if (ptl_interfaces[i] == nal) { - nal->refct++; - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | i; - CDEBUG(D_OTHER, "Returning existing NAL (%d)\n", i); - ptl_ni_init_mutex_exit (); - return PTL_OK; - } + + if (interface < 0 || + interface >= NAL_MAX_NR || + ptl_nal_table[interface] == NULL) { + GOTO(out, rc = PTL_IFACE_INVALID); } - nal->refct = 1; - if (ptl_num_interfaces >= MAX_NIS) { - if (nal->shutdown) - nal->shutdown (nal, ptl_num_interfaces); - ptl_ni_init_mutex_exit (); - return PTL_NO_SPACE; - } + nal = ptl_nal_table[interface]; - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces; - ptl_interfaces[ptl_num_interfaces++] = nal; + CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct); + rc = nal->startup(nal, requested_pid, desired_limits, actual_limits); - ptl_eq_ni_init(nal); - ptl_me_ni_init(nal); + if (rc != PTL_OK) { + CERROR("Error %d starting up NAL %d, refs %d\n", rc, + interface, nal->nal_refct); + GOTO(out, rc); + } + + if (nal->nal_refct != 0) { + /* Caller gets to know if this was the first ref or not */ + rc = PTL_IFACE_DUP; + } + + nal->nal_refct++; + handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface; - ptl_ni_init_mutex_exit (); - return PTL_OK; + out: + ptl_mutex_exit (); + return rc; } - int PtlNIFini(ptl_handle_ni_t ni) { nal_t *nal; - int idx; - int rc; + int idx; if (!ptl_init) return PTL_NO_INIT; - ptl_ni_init_mutex_enter (); + ptl_mutex_enter (); nal = ptl_hndl2nal (&ni); if (nal == NULL) { - ptl_ni_init_mutex_exit (); + ptl_mutex_exit (); return PTL_HANDLE_INVALID; } idx = ni.nal_idx & NI_HANDLE_MASK; - nal->refct--; - if (nal->refct > 0) { - ptl_ni_init_mutex_exit (); - return PTL_OK; - } - - ptl_me_ni_fini(nal); - ptl_eq_ni_fini(nal); + LASSERT(nal->nal_refct > 0); - rc = PTL_OK; - if (nal->shutdown) - rc = nal->shutdown(nal, idx); + nal->nal_refct--; - ptl_interfaces[idx] = NULL; - ptl_num_interfaces--; + /* nal_refct == 0 tells nal->shutdown to really shut down */ + nal->shutdown(nal); - ptl_ni_init_mutex_exit (); - return rc; + ptl_mutex_exit (); + return PTL_OK; } int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out) diff --git a/lustre/portals/portals/api-wrap.c b/lustre/portals/portals/api-wrap.c index 9c82c30cd679ffa98d4b54ee69ca0b9735959604..3e6f9ce62f65c0928456fbaae9df3742e4fd75d1 100644 --- a/lustre/portals/portals/api-wrap.c +++ b/lustre/portals/portals/api-wrap.c @@ -124,25 +124,6 @@ int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, return ret.rc; } - - -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in) -{ - PtlNIDebug_in args; - PtlNIDebug_out ret; - int rc; - - args.mask_in = mask_in; - - rc = do_forward(ni, PTL_NIDEBUG, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - return ret.rc; -} - int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, @@ -255,45 +236,6 @@ int PtlMEDump(ptl_handle_me_t current_in) return ret.rc; } -static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in) -{ - nal_t *nal; - int rc; - int i; - - if (!ptl_init) { - CERROR("PtlMDAttach/Bind/Update: Not initialized\n"); - return PTL_NO_INIT; - } - - nal = ptl_hndl2nal(¤t_in); - if (!nal) - return PTL_HANDLE_INVALID; - - if (nal->validate != NULL) /* nal->validate not a NOOP */ - { - if ((md_in.options & PTL_MD_IOVEC) == 0) /* contiguous */ - { - rc = nal->validate (nal, md_in.start, md_in.length); - if (rc) - return (PTL_SEGV); - } - else - { - struct iovec *iov = (struct iovec *)md_in.start; - - for (i = 0; i < md_in.niov; i++, iov++) - { - rc = nal->validate (nal, iov->iov_base, iov->iov_len); - if (rc) - return (PTL_SEGV); - } - } - } - - return 0; -} - static ptl_handle_eq_t md2eq (ptl_md_t *md) { if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE)) @@ -310,16 +252,13 @@ int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in, PtlMDAttach_out ret; int rc; - rc = validate_md(me_in, md_in); - if (rc == PTL_OK) { - args.eq_in = md2eq(&md_in); - args.me_in = me_in; - args.md_in = md_in; - args.unlink_in = unlink_in; + args.eq_in = md2eq(&md_in); + args.me_in = me_in; + args.md_in = md_in; + args.unlink_in = unlink_in; - rc = do_forward(me_in, PTL_MDATTACH, - &args, sizeof(args), &ret, sizeof(ret)); - } + rc = do_forward(me_in, PTL_MDATTACH, + &args, sizeof(args), &ret, sizeof(ret)); if (rc != PTL_OK) return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc; @@ -340,10 +279,6 @@ int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, PtlMDBind_out ret; int rc; - rc = validate_md(ni_in, md_in); - if (rc != PTL_OK) - return rc; - args.eq_in = md2eq(&md_in); args.ni_in = ni_in; args.md_in = md_in; @@ -378,9 +313,6 @@ int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout, args.old_inout_valid = 0; if (new_inout) { - rc = validate_md (md_in, *new_inout); - if (rc != PTL_OK) - return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc; args.new_inout = *new_inout; args.new_inout_valid = 1; } else @@ -423,7 +355,7 @@ int PtlMDUnlink(ptl_handle_md_t md_in) } int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, - int (*callback) (ptl_event_t * event), + ptl_eq_handler_t callback, ptl_handle_eq_t * handle_out) { ptl_eq_t *eq = NULL; @@ -458,12 +390,6 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, for (i = 0; i < count; i++) ev[i].sequence = 0; - if (nal->validate != NULL) { - rc = nal->validate(nal, ev, count * sizeof(ptl_event_t)); - if (rc != PTL_OK) - goto fail; - } - args.ni_in = interface; args.count_in = count; args.base_in = ev; diff --git a/lustre/portals/portals/lib-dispatch.c b/lustre/portals/portals/lib-dispatch.c index 13036c7bc537c82a317dfc2c00571e8c63f25f4a..798e1173357d18643b4582df2234997209b3a20f 100644 --- a/lustre/portals/portals/lib-dispatch.c +++ b/lustre/portals/portals/lib-dispatch.c @@ -35,7 +35,6 @@ static dispatch_table_t dispatch_table[] = { [PTL_GETID] {do_PtlGetId, "PtlGetId"}, [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"}, [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"}, - [PTL_NIDEBUG] {do_PtlNIDebug, "PtlNIDebug"}, [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"}, [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"}, [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"}, diff --git a/lustre/portals/portals/lib-init.c b/lustre/portals/portals/lib-init.c index 61ef465d061ac0396b519094d51baf2f50fd4cfa..c62dbc2d5bd751730d795a294f97d988d49afea5 100644 --- a/lustre/portals/portals/lib-init.c +++ b/lustre/portals/portals/lib-init.c @@ -41,8 +41,15 @@ #ifndef PTL_USE_LIB_FREELIST int -kportal_descriptor_setup (nal_cb_t *nal) +kportal_descriptor_setup (nal_cb_t *nal, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { + /* Ignore requested limits! */ + actual_limits->max_mes = INT_MAX; + actual_limits->max_mds = INT_MAX; + actual_limits->max_eqs = INT_MAX; + return PTL_OK; } @@ -100,7 +107,9 @@ lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl) } int -kportal_descriptor_setup (nal_cb_t *nal) +kportal_descriptor_setup (nal_cb_t *nal, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { /* NB on failure caller must still call kportal_descriptor_cleanup */ /* ****** */ @@ -111,6 +120,13 @@ kportal_descriptor_setup (nal_cb_t *nal) memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds)); memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs)); + /* Ignore requested limits! */ + actual_limits->max_mes = MAX_MES; + actual_limits->max_mds = MAX_MDS; + actual_limits->max_eqs = MAX_EQS; + /* Hahahah what a load of bollocks. There's nowhere to + * specify the max # messages in-flight */ + rc = lib_freelist_init (nal, &nal->ni.ni_free_mes, MAX_MES, sizeof (lib_me_t)); if (rc != PTL_OK) @@ -248,21 +264,18 @@ lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh) } int -lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size) +lib_init(nal_cb_t *nal, ptl_process_id_t process_id, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { int rc = PTL_OK; lib_ni_t *ni = &nal->ni; + int ptl_size; int i; ENTRY; /* NB serialised in PtlNIInit() */ - if (ni->refcnt != 0) { /* already initialised */ - ni->refcnt++; - goto out; - } - lib_assert_wire_constants (); /* @@ -271,7 +284,8 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, */ memset(&ni->counters, 0, sizeof(lib_counters_t)); - rc = kportal_descriptor_setup (nal); + rc = kportal_descriptor_setup (nal, requested_limits, + &ni->actual_limits); if (rc != PTL_OK) goto out; @@ -287,12 +301,15 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, if (rc != PTL_OK) goto out; - ni->nid = nid; - ni->pid = pid; + ni->nid = process_id.nid; + ni->pid = process_id.pid; - ni->num_nodes = gsize; - ni->tbl.size = ptl_size; + if (requested_limits != NULL) + ptl_size = requested_limits->max_pt_index + 1; + else + ptl_size = 64; + ni->tbl.size = ptl_size; ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size); if (ni->tbl.tbl == NULL) { rc = PTL_NO_SPACE; @@ -302,9 +319,20 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, for (i = 0; i < ptl_size; i++) INIT_LIST_HEAD(&(ni->tbl.tbl[i])); - ni->debug = PTL_DEBUG_NONE; - ni->up = 1; - ni->refcnt++; + /* max_{mes,mds,eqs} set in kportal_descriptor_setup */ + + /* We don't have an access control table! */ + ni->actual_limits.max_ac_index = -1; + + ni->actual_limits.max_pt_index = ptl_size - 1; + ni->actual_limits.max_md_iovecs = PTL_MD_MAX_IOV; + ni->actual_limits.max_me_list = INT_MAX; + + /* We don't support PtlGetPut! */ + ni->actual_limits.max_getput_md = 0; + + if (actual_limits != NULL) + *actual_limits = ni->actual_limits; out: if (rc != PTL_OK) { @@ -321,12 +349,7 @@ lib_fini(nal_cb_t * nal) lib_ni_t *ni = &nal->ni; int idx; - ni->refcnt--; - - if (ni->refcnt != 0) - goto out; - - /* NB no stat_lock() since this is the last reference. The NAL + /* NB no state_lock() since this is the last reference. The NAL * should have shut down already, so it should be safe to unlink * and free all descriptors, even those that appear committed to a * network op (eg MD with non-zero pending count) @@ -370,11 +393,9 @@ lib_fini(nal_cb_t * nal) } nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size); - ni->up = 0; lib_cleanup_handle_hash (nal); kportal_descriptor_cleanup (nal); - out: return (PTL_OK); } diff --git a/lustre/portals/portals/lib-md.c b/lustre/portals/portals/lib-md.c index 9a391cde5c94e43ec2599d62787977398ffb98ff..64a55b93556d6b2e4dc0649db5bc696b3940ba0f 100644 --- a/lustre/portals/portals/lib-md.c +++ b/lustre/portals/portals/lib-md.c @@ -86,6 +86,7 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, lib_eq_t *eq = NULL; int rc; int i; + int niov; /* NB we are passed an allocated, but uninitialised/active md. * if we return success, caller may lib_md_unlink() it. @@ -101,7 +102,7 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, /* Must check this _before_ allocation. Also, note that non-iov * MDs must set md_niov to 0. */ LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 || - md->niov <= PTL_MD_MAX_IOV); + md->length <= PTL_MD_MAX_IOV); /* This implementation doesn't know how to create START events or * disable END events. Best to LASSERT our caller is compliant so @@ -116,7 +117,6 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, new->me = NULL; new->start = md->start; - new->length = md->length; new->offset = 0; new->max_size = md->max_size; new->options = md->options; @@ -132,13 +132,13 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */ return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.iov, md->start, - md->niov * sizeof (new->md_iov.iov[0]))) + niov * sizeof (new->md_iov.iov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the base address on trust */ if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */ return PTL_VAL_FAILED; @@ -146,11 +146,10 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.iov[i].iov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - + new->length = total_length; + if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); @@ -166,13 +165,13 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, nal->cb_recv_pages == NULL) return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.kiov, md->start, - md->niov * sizeof (new->md_iov.kiov[0]))) + niov * sizeof (new->md_iov.kiov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the page pointer on trust */ if (new->md_iov.kiov[i].kiov_offset + new->md_iov.kiov[i].kiov_len > PAGE_SIZE ) @@ -181,23 +180,23 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.kiov[i].kiov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; + new->length = total_length; if (nal->cb_map_pages != NULL) { - rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov, + rc = nal->cb_map_pages (nal, niov, new->md_iov.kiov, &new->md_addrkey); if (rc != PTL_OK) return (rc); } #endif } else { /* contiguous */ - new->md_niov = 1; + new->length = md->length; + new->md_niov = niov = 1; new->md_iov.iov[0].iov_base = md->start; new->md_iov.iov[0].iov_len = md->length; if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); @@ -223,13 +222,13 @@ void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new) * and that's all. */ new->start = md->start; - new->length = md->length; + new->length = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? + md->length : md->md_niov; new->threshold = md->threshold; new->max_size = md->max_size; new->options = md->options; new->user_ptr = md->user_ptr; ptl_eq2handle(&new->eventq, md->eq); - new->niov = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov; } int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) @@ -251,8 +250,8 @@ int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) unsigned long flags; if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) @@ -303,8 +302,8 @@ int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret) unsigned long flags; if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) @@ -407,23 +406,16 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, goto out; } - /* XXX fttb, the new MD must be the same type wrt fragmentation */ - if (((new->options ^ md->options) & - (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) { - ret->rc = PTL_MD_INVALID; - goto out; - } - - if (new->niov > md->md_niov) { - ret->rc = PTL_IOV_TOO_MANY; + /* XXX fttb, the new MD must be the same "shape" wrt fragmentation, + * since we simply overwrite the old lib-md */ + if ((((new->options ^ md->options) & + (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) || + ((new->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && + new->length != md->md_niov)) { + ret->rc = PTL_IOV_INVALID; goto out; } - if (new->niov < md->md_niov) { - ret->rc = PTL_IOV_TOO_SMALL; - goto out; - } - if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) { test_eq = ptl_handle2eq(&args->testq_in, nal); if (test_eq == NULL) { diff --git a/lustre/portals/portals/lib-me.c b/lustre/portals/portals/lib-me.c index e3c46ea154b55cf0c075423e28f935867f1989cd..271fc820c83a0dc95f7da912c6b5c8c622cc65f7 100644 --- a/lustre/portals/portals/lib-me.c +++ b/lustre/portals/portals/lib-me.c @@ -146,13 +146,6 @@ int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) /* call with state_lock please */ void lib_me_unlink(nal_cb_t *nal, lib_me_t *me) { - lib_ni_t *ni = &nal->ni; - - if (ni->debug & PTL_DEBUG_UNLINK) { - ptl_handle_any_t handle; - ptl_me2handle(&handle, me); - } - list_del (&me->me_list); if (me->md) { diff --git a/lustre/portals/portals/lib-msg.c b/lustre/portals/portals/lib-msg.c index 869c9d693f330da14abee12f30ddc9c172222748..1b69533f09e9161d715a130a1e26abe30ebfd44f 100644 --- a/lustre/portals/portals/lib-msg.c +++ b/lustre/portals/portals/lib-msg.c @@ -89,10 +89,6 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status) int rc; ptl_hdr_t ack; - /* ni went down while processing this message */ - if (nal->ni.up == 0) - return; - if (msg == NULL) return; diff --git a/lustre/portals/portals/lib-ni.c b/lustre/portals/portals/lib-ni.c index 296bc4a2ac3b1d9846699f161a668ded9d9ffe35..aa959fcf88c607e362b3ce32959b8f60ae7fcbe2 100644 --- a/lustre/portals/portals/lib-ni.c +++ b/lustre/portals/portals/lib-ni.c @@ -29,18 +29,6 @@ #define MAX_DIST 18446744073709551615ULL -int do_PtlNIDebug(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlNIDebug_in *args = v_args; - PtlNIDebug_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - - ret->rc = ni->debug; - ni->debug = args->mask_in; - - return 0; -} - int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret) { /* diff --git a/lustre/portals/portals/module.c b/lustre/portals/portals/module.c index 012d3d91083ae236a21cbebbff88083c129be0ad..40e9da4369fe1c708723fe233a0fa61a3615c085 100644 --- a/lustre/portals/portals/module.c +++ b/lustre/portals/portals/module.c @@ -46,273 +46,17 @@ #include <portals/lib-p30.h> #include <portals/p30.h> +#include <portals/nal.h> #include <linux/kp30.h> #include <linux/kpr.h> #include <linux/portals_compat25.h> extern void (kping_client)(struct portal_ioctl_data *); -struct nal_cmd_handler { - nal_cmd_handler_t nch_handler; - void * nch_private; -}; - -static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; -static DECLARE_MUTEX(nal_cmd_sem); - - -static int -kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_del_route(int gw_nalid, ptl_nid_t gw_nid, - ptl_nid_t lo, ptl_nid_t hi) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid, - int alive, time_t when) -{ - int rc; - kpr_control_interface_t *ci; - - /* No error if router not preset. Sysadmin is allowed to notify - * _everywhere_ when a NID boots or crashes, even if they know - * nothing of the peer. */ - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (0); - - rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp, - ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep) -{ - int gateway_nalid; - ptl_nid_t gateway_nid; - ptl_nid_t lo_nid; - ptl_nid_t hi_nid; - int alive; - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid, - &lo_nid, &hi_nid, &alive); - - if (rc == 0) { - CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n", - index, gateway_nalid, gateway_nid, lo_nid, hi_nid, - alive ? "up" : "down"); - - *gateway_nalidp = (__u32)gateway_nalid; - *gateway_nidp = gateway_nid; - *lo_nidp = lo_nid; - *hi_nidp = hi_nid; - *alivep = alive; - } - - PORTAL_SYMBOL_PUT (kpr_control_interface); - return (rc); -} - -static int -kportal_router_cmd(struct portals_cfg *pcfg, void * private) -{ - int err = -EINVAL; - ENTRY; - - switch(pcfg->pcfg_command) { - default: - CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command); - break; - - case NAL_CMD_ADD_ROUTE: - CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - - case NAL_CMD_DEL_ROUTE: - CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - - case NAL_CMD_NOTIFY_ROUTER: { - CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags ? "Enabling" : "Disabling", - (time_t)pcfg->pcfg_nid3); - - err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags, - (time_t)pcfg->pcfg_nid3); - break; - } - - case NAL_CMD_GET_ROUTE: - CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count); - err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal, - &pcfg->pcfg_nid, - &pcfg->pcfg_nid2, &pcfg->pcfg_nid3, - &pcfg->pcfg_flags); - break; - } - RETURN(err); -} - -int -kportal_nal_cmd(struct portals_cfg *pcfg) -{ - __u32 nal = pcfg->pcfg_nal; - int rc = -EINVAL; - - ENTRY; - - down(&nal_cmd_sem); - if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) { - CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, - pcfg->pcfg_command); - rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private); - } - up(&nal_cmd_sem); - RETURN(rc); -} - -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - return (PORTAL_SYMBOL_GET(kqswnal_ni)); - case SOCKNAL: - return (PORTAL_SYMBOL_GET(ksocknal_ni)); - case GMNAL: - return (PORTAL_SYMBOL_GET(kgmnal_ni)); - case IBNAL: - return (PORTAL_SYMBOL_GET(kibnal_ni)); - case TCPNAL: - /* userspace NAL */ - return (NULL); - case SCIMACNAL: - return (PORTAL_SYMBOL_GET(kscimacnal_ni)); - default: - /* A warning to a naive caller */ - CERROR ("unknown nal: %d\n", nal); - return (NULL); - } -} - -void -kportal_put_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - PORTAL_SYMBOL_PUT(kqswnal_ni); - break; - case SOCKNAL: - PORTAL_SYMBOL_PUT(ksocknal_ni); - break; - case GMNAL: - PORTAL_SYMBOL_PUT(kgmnal_ni); - break; - case IBNAL: - PORTAL_SYMBOL_PUT(kibnal_ni); - break; - case TCPNAL: - /* A lesson to a malicious caller */ - LBUG (); - case SCIMACNAL: - PORTAL_SYMBOL_PUT(kscimacnal_ni); - break; - default: - CERROR ("unknown nal: %d\n", nal); - } -} - -int -kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - if (nal_cmd[nal].nch_handler != NULL) - rc = -EBUSY; - else { - nal_cmd[nal].nch_handler = handler; - nal_cmd[nal].nch_private = private; - } - up(&nal_cmd_sem); - } - return rc; -} - -int -kportal_nal_unregister(int nal) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - nal_cmd[nal].nch_handler = NULL; - nal_cmd[nal].nch_private = NULL; - up(&nal_cmd_sem); - } - return rc; -} - static int kportal_ioctl(struct portal_ioctl_data *data, unsigned int cmd, unsigned long arg) { - int err = 0; + int err; char str[PTL_NALFMT_SIZE]; ENTRY; @@ -334,68 +78,53 @@ static int kportal_ioctl(struct portal_ioctl_data *data, } case IOC_PORTAL_GET_NID: { - const ptl_handle_ni_t *nip; - ptl_process_id_t pid; + ptl_handle_ni_t nih; + ptl_process_id_t pid; CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) + err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + if (!(err == PTL_OK || err == PTL_IFACE_DUP)) RETURN (-EINVAL); - err = PtlGetId (*nip, &pid); + err = PtlGetId (nih, &pid); LASSERT (err == PTL_OK); - kportal_put_ni (data->ioc_nal); + + PtlNIFini(nih); data->ioc_nid = pid.nid; if (copy_to_user ((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; + RETURN (-EFAULT); + RETURN(0); } - case IOC_PORTAL_NAL_CMD: { - struct portals_cfg pcfg; - - LASSERT (data->ioc_plen1 == sizeof(pcfg)); - err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1, - sizeof(pcfg)); - if ( err ) { - EXIT; - return err; - } - - CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, - pcfg.pcfg_command); - err = kportal_nal_cmd(&pcfg); - if (err == 0) { - if (copy_to_user((char *)data->ioc_pbuf1, &pcfg, - sizeof (pcfg))) - err = -EFAULT; - if (copy_to_user((char *)arg, data, sizeof (*data))) - err = -EFAULT; - } - break; - } case IOC_PORTAL_FAIL_NID: { - const ptl_handle_ni_t *nip; + ptl_handle_ni_t nih; CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", data->ioc_nal, data->ioc_nid, data->ioc_count); - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) + err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + if (!(err == PTL_OK || err == PTL_IFACE_DUP)) return (-EINVAL); - err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count); - kportal_put_ni (data->ioc_nal); - break; + if (err == PTL_OK) { + /* There's no point in failing an interface that + * came into existance just for this */ + err = -EINVAL; + } else { + err = PtlFailNid (nih, data->ioc_nid, data->ioc_count); + if (err != PTL_OK) + err = -EINVAL; + } + + PtlNIFini(nih); + RETURN (err); } default: - err = -EINVAL; - break; + RETURN(-EINVAL); } - - RETURN(err); + /* Not Reached */ } DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl); @@ -411,30 +140,24 @@ static int init_kportals_module(void) RETURN(rc); } - rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL); - if (rc) { - PtlFini(); - CERROR("kportal_nal_registre: ROUTER error %d\n", rc); - } - - if (rc == 0) - libcfs_register_ioctl(&kportal_ioctl_handler); + rc = libcfs_register_ioctl(&kportal_ioctl_handler); + LASSERT (rc == 0); RETURN(rc); } static void exit_kportals_module(void) { - libcfs_deregister_ioctl(&kportal_ioctl_handler); - kportal_nal_unregister(ROUTER); + int rc; + + rc = libcfs_deregister_ioctl(&kportal_ioctl_handler); + LASSERT (rc == 0); + PtlFini(); } -EXPORT_SYMBOL(kportal_nal_register); -EXPORT_SYMBOL(kportal_nal_unregister); -EXPORT_SYMBOL(kportal_get_ni); -EXPORT_SYMBOL(kportal_put_ni); -EXPORT_SYMBOL(kportal_nal_cmd); +EXPORT_SYMBOL(ptl_register_nal); +EXPORT_SYMBOL(ptl_unregister_nal); EXPORT_SYMBOL(ptl_err_str); EXPORT_SYMBOL(lib_dispatch); @@ -446,7 +169,6 @@ EXPORT_SYMBOL(PtlMDAttach); EXPORT_SYMBOL(PtlMDUnlink); EXPORT_SYMBOL(PtlNIInit); EXPORT_SYMBOL(PtlNIFini); -EXPORT_SYMBOL(PtlNIDebug); EXPORT_SYMBOL(PtlInit); EXPORT_SYMBOL(PtlFini); EXPORT_SYMBOL(PtlSnprintHandle); diff --git a/lustre/portals/router/router.c b/lustre/portals/router/router.c index 27aab67a9769749f891078a104246ee377317874..9fb6afef4c1ff2aae0cc242e95c656b09cf79895 100644 --- a/lustre/portals/router/router.c +++ b/lustre/portals/router/router.c @@ -48,13 +48,6 @@ kpr_router_interface_t kpr_router_interface = { kprri_deregister: kpr_deregister_nal, }; -kpr_control_interface_t kpr_control_interface = { - kprci_add_route: kpr_add_route, - kprci_del_route: kpr_del_route, - kprci_get_route: kpr_get_route, - kprci_notify: kpr_sys_notify, -}; - int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) { @@ -637,7 +630,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, int kpr_sys_notify (int gateway_nalid, ptl_nid_t gateway_nid, - int alive, time_t when) + int alive, time_t when) { return (kpr_do_notify (0, gateway_nalid, gateway_nid, alive, when)); } @@ -696,8 +689,8 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, } int -kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive) +kpr_get_route (int idx, __u32 *gateway_nalid, ptl_nid_t *gateway_nid, + ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, __u32 *alive) { struct list_head *e; @@ -725,11 +718,67 @@ kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, return (-ENOENT); } +static int +kpr_nal_cmd(struct portals_cfg *pcfg, void * private) +{ + int err = -EINVAL; + ENTRY; + + switch(pcfg->pcfg_command) { + default: + CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command); + break; + + case NAL_CMD_ADD_ROUTE: + CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", + pcfg->pcfg_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + break; + + case NAL_CMD_DEL_ROUTE: + CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", + pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + break; + + case NAL_CMD_NOTIFY_ROUTER: { + CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", + pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_flags ? "Enabling" : "Disabling", + (time_t)pcfg->pcfg_nid3); + + err = kpr_sys_notify (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_flags, (time_t)pcfg->pcfg_nid3); + break; + } + + case NAL_CMD_GET_ROUTE: + CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count); + err = kpr_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal, + &pcfg->pcfg_nid, + &pcfg->pcfg_nid2, &pcfg->pcfg_nid3, + &pcfg->pcfg_flags); + break; + } + RETURN(err); +} + + static void /*__exit*/ kpr_finalise (void) { LASSERT (list_empty (&kpr_nals)); + libcfs_nal_cmd_unregister(ROUTER); + + PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); + + kpr_proc_fini(); + while (!list_empty (&kpr_routes)) { kpr_route_entry_t *re = list_entry(kpr_routes.next, kpr_route_entry_t, @@ -739,11 +788,6 @@ kpr_finalise (void) PORTAL_FREE(re, sizeof (*re)); } - kpr_proc_fini(); - - PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); - PORTAL_SYMBOL_UNREGISTER(kpr_control_interface); - CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n", atomic_read(&portal_kmemory)); } @@ -751,13 +795,20 @@ kpr_finalise (void) static int __init kpr_initialise (void) { + int rc; + CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", atomic_read(&portal_kmemory)); kpr_proc_init(); + rc = libcfs_nal_cmd_register(ROUTER, kpr_nal_cmd, NULL); + if (rc != 0) { + CERROR("Can't register nal cmd handler\n"); + return (rc); + } + PORTAL_SYMBOL_REGISTER(kpr_router_interface); - PORTAL_SYMBOL_REGISTER(kpr_control_interface); return (0); } @@ -768,5 +819,4 @@ MODULE_LICENSE("GPL"); module_init (kpr_initialise); module_exit (kpr_finalise); -EXPORT_SYMBOL (kpr_control_interface); EXPORT_SYMBOL (kpr_router_interface); diff --git a/lustre/portals/router/router.h b/lustre/portals/router/router.h index 309025b3f6f76c42c4a43c110253f7cadd31e35a..0787064c502a4515eb4d9db0180b9724bb7b7ffb 100644 --- a/lustre/portals/router/router.h +++ b/lustre/portals/router/router.h @@ -93,15 +93,6 @@ extern void kpr_deregister_nal (void *arg); extern void kpr_proc_init (void); extern void kpr_proc_fini (void); -extern int kpr_add_route (int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); -extern int kpr_del_route (int gw_nal, ptl_nid_t gw_nid, - ptl_nid_t lo, ptl_nid_t hi); -extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive); -extern int kpr_sys_notify (int gw_nalid, ptl_nid_t gw_nid, - int alive, time_t when); - extern unsigned long long kpr_fwd_bytes; extern unsigned long kpr_fwd_packets; extern unsigned long kpr_fwd_errors; diff --git a/lustre/portals/tests/ping_cli.c b/lustre/portals/tests/ping_cli.c index 9977f206720cbb50e91f7c4fa06283a93d491424..b216df16c589451fee41e398bbb01f46dc0852c5 100644 --- a/lustre/portals/tests/ping_cli.c +++ b/lustre/portals/tests/ping_cli.c @@ -46,7 +46,7 @@ static struct pingcli_data *client = NULL; static int count = 0; static void -pingcli_shutdown(int err) +pingcli_shutdown(ptl_handle_ni_t nih, int err) { int rc; @@ -70,7 +70,7 @@ pingcli_shutdown(int err) if ((rc = PtlMEUnlink (client->me))) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (client->args->ioc_nal); + PtlNIFini(nih); case 4: /* Free our buffers */ @@ -84,7 +84,7 @@ pingcli_shutdown(int err) CDEBUG (D_OTHER, "ping client released resources\n"); } /* pingcli_shutdown() */ -static int pingcli_callback(ptl_event_t *ev) +static void pingcli_callback(ptl_event_t *ev) { int i, magic; i = *(int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned)); @@ -92,21 +92,19 @@ static int pingcli_callback(ptl_event_t *ev) if(magic != 0xcafebabe) { printk ("LustreError: Unexpected response \n"); - return 1; } if((i == count) || !count) wake_up_process (client->tsk); else printk ("LustreError: Received response after timeout for %d\n",i); - return 1; } static struct pingcli_data * pingcli_start(struct portal_ioctl_data *args) { - ptl_handle_ni_t *nip; + ptl_handle_ni_t nih = PTL_INVALID_HANDLE; unsigned ping_head_magic = PING_HEADER_MAGIC; unsigned ping_bulk_magic = PING_BULK_MAGIC; int rc; @@ -127,7 +125,7 @@ pingcli_start(struct portal_ioctl_data *args) if (client->outbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } @@ -136,23 +134,24 @@ pingcli_start(struct portal_ioctl_data *args) if (client->inbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) + rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); + if (rc != PTL_OK || rc != PTL_IFACE_DUP) { CERROR ("NAL %d not loaded\n", args->ioc_nal); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) + if ((rc = PtlGetId (nih, &client->myid))) { CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -164,20 +163,20 @@ pingcli_start(struct portal_ioctl_data *args) client->id_remote.nid = args->ioc_nid; client->id_remote.pid = 0; - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, + if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT, client->id_local, 0, ~0, PTL_RETAIN, PTL_INS_AFTER, &client->me))) { CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) + if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq))) { CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -196,7 +195,7 @@ pingcli_start(struct portal_ioctl_data *args) if ((rc = PtlMDAttach (client->me, client->md_in_head, PTL_UNLINK, &client->md_in_head_h))) { CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return (NULL); } /* Setup the outgoing ping header */ @@ -212,10 +211,10 @@ pingcli_start(struct portal_ioctl_data *args) count = 0; /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, + if ((rc=PtlMDBind (nih, client->md_out_head, PTL_UNLINK, &client->md_out_head_h))) { CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return NULL; } while ((args->ioc_count - count)) { @@ -230,7 +229,7 @@ pingcli_start(struct portal_ioctl_data *args) if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return NULL; } printk ("Lustre: sent msg no %d", count); @@ -255,7 +254,7 @@ pingcli_start(struct portal_ioctl_data *args) PORTAL_FREE (client->inbuf, (args->ioc_size + STDSIZE) * args->ioc_count); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); /* Success! */ return NULL; diff --git a/lustre/portals/tests/ping_srv.c b/lustre/portals/tests/ping_srv.c index 0aa1ea7e08dbd017c4ead862625856444a960810..84da81489a8a32d153b3dfb4d49f95310922cda4 100644 --- a/lustre/portals/tests/ping_srv.c +++ b/lustre/portals/tests/ping_srv.c @@ -81,7 +81,7 @@ static void *pingsrv_shutdown(int err) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (nal); + PtlNIFini (server->ni); case 4: @@ -167,19 +167,18 @@ int pingsrv_thread(void *arg) return 0; } -static int pingsrv_packet(ptl_event_t *ev) +static void pingsrv_packet(ptl_event_t *ev) { atomic_inc (&pkt); wake_up_process (server->tsk); - return 1; } /* pingsrv_head() */ -static int pingsrv_callback(ptl_event_t *ev) +static void pingsrv_callback(ptl_event_t *ev) { if (ev == NULL) { CERROR ("null in callback, ev=%p\n", ev); - return 0; + return; } server->evnt = *ev; @@ -193,23 +192,24 @@ static int pingsrv_callback(ptl_event_t *ev) packets_valid++; - return pingsrv_packet(ev); + pingsrv_packet(ev); } /* pingsrv_callback() */ static struct pingsrv_data *pingsrv_setup(void) { - ptl_handle_ni_t *nip; int rc; + server->ni = PTL_INVALID_HANDLE; + /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { + rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); + if (!(rc == PTL_OK || rc == PTL_IFACE_DUP)) { CDEBUG (D_OTHER, "NAL %d not loaded\n", nal); return pingsrv_shutdown (4); } - server->ni= *nip; /* Based on the initialization aquire our unique portal ID. */ if ((rc = PtlGetId (server->ni, &server->my_id))) { @@ -229,7 +229,7 @@ static struct pingsrv_data *pingsrv_setup(void) } - if ((rc = PtlEQAlloc (server->ni, 1024, pingsrv_callback, + if ((rc = PtlEQAlloc (server->ni, 1024, &pingsrv_callback, &server->eq))) { PDEBUG ("PtlEQAlloc (callback)", rc); return pingsrv_shutdown (2); diff --git a/lustre/portals/tests/sping_cli.c b/lustre/portals/tests/sping_cli.c index 663da4ef54e3a1cfd1c66be6912e0b4d900e7b62..8e8649195dd8da312f7cf5c3fa28d8cecce6e0da 100644 --- a/lustre/portals/tests/sping_cli.c +++ b/lustre/portals/tests/sping_cli.c @@ -51,7 +51,7 @@ static struct pingcli_data *client = NULL; static int count = 0; static void -pingcli_shutdown(int err) +pingcli_shutdown(ptl_handle_ni_t nih, int err) { int rc; @@ -72,7 +72,7 @@ pingcli_shutdown(int err) if ((rc = PtlMEUnlink (client->me))) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (client->args->ioc_nal); + PtlNIFini (nih); case 4: /* Free our buffers */ @@ -92,17 +92,16 @@ pingcli_shutdown(int err) CDEBUG (D_OTHER, "ping client released resources\n"); } /* pingcli_shutdown() */ -static int pingcli_callback(ptl_event_t *ev) +static void pingcli_callback(ptl_event_t *ev) { - wake_up_process (client->tsk); - return 1; + wake_up_process (client->tsk); } static struct pingcli_data * pingcli_start(struct portal_ioctl_data *args) { - const ptl_handle_ni_t *nip; + ptl_handle_ni_t nih = PTL_INVALID_HANDLE; unsigned ping_head_magic = PING_HEADER_MAGIC; char str[PTL_NALFMT_SIZE]; int rc; @@ -122,7 +121,7 @@ pingcli_start(struct portal_ioctl_data *args) if (client->outbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } @@ -131,23 +130,24 @@ pingcli_start(struct portal_ioctl_data *args) if (client->inbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) + rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CERROR ("NAL %d not loaded.\n", args->ioc_nal); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) + if ((rc = PtlGetId (nih, &client->myid))) { CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -159,20 +159,20 @@ pingcli_start(struct portal_ioctl_data *args) client->id_remote.nid = args->ioc_nid; client->id_remote.pid = 0; - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, + if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT, client->id_local, 0, ~0, PTL_RETAIN, PTL_INS_AFTER, &client->me))) { CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) + if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq))) { CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -189,7 +189,7 @@ pingcli_start(struct portal_ioctl_data *args) if ((rc = PtlMDAttach (client->me, client->md_in_head, PTL_UNLINK, &client->md_in_head_h))) { CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return (NULL); } @@ -204,17 +204,17 @@ pingcli_start(struct portal_ioctl_data *args) memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic)); /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, + if ((rc=PtlMDBind (nih, client->md_out_head, PTL_UNLINK, &client->md_out_head_h))) { CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return (NULL); } /* Put the ping packet */ if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return NULL; } @@ -223,13 +223,13 @@ pingcli_start(struct portal_ioctl_data *args) rc = schedule_timeout (20 * args->ioc_timeout); if (rc == 0) { printk ("LustreError: Time out on the server\n"); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return NULL; } else printk("Lustre: Received respose from the server \n"); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); /* Success! */ return NULL; diff --git a/lustre/portals/tests/sping_srv.c b/lustre/portals/tests/sping_srv.c index e8fb47051c19c8ee68c7bd9e7d143d85f2efd714..9e731261cc322c1c536b87bf9c83fa78f80bf57c 100644 --- a/lustre/portals/tests/sping_srv.c +++ b/lustre/portals/tests/sping_srv.c @@ -53,7 +53,7 @@ #define STDSIZE (sizeof(int) + sizeof(int) + 4) -static int nal = 0; // Your NAL, +static int nal = PTL_IFACE_DEFAULT; // Your NAL, static unsigned long packets_valid = 0; // Valid packets static int running = 1; atomic_t pkt; @@ -86,7 +86,7 @@ static void *pingsrv_shutdown(int err) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (nal); + PtlNIFini(server->ni); case 4: @@ -159,19 +159,18 @@ int pingsrv_thread(void *arg) return 0; } -static int pingsrv_packet(ptl_event_t *ev) +static void pingsrv_packet(ptl_event_t *ev) { atomic_inc (&pkt); wake_up_process (server->tsk); - return 1; } /* pingsrv_head() */ -static int pingsrv_callback(ptl_event_t *ev) +static void pingsrv_callback(ptl_event_t *ev) { if (ev == NULL) { CERROR ("null in callback, ev=%p\n", ev); - return 0; + return; } server->evnt = *ev; @@ -182,24 +181,24 @@ static int pingsrv_callback(ptl_event_t *ev) packets_valid++; - return pingsrv_packet(ev); + pingsrv_packet(ev); } /* pingsrv_callback() */ static struct pingsrv_data *pingsrv_setup(void) { - ptl_handle_ni_t *nip; int rc; /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { + server->ni = PTL_INVALID_HANDLE; + + rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal); return pingsrv_shutdown (4); } - server->ni= *nip; - /* Based on the initialization aquire our unique portal ID. */ if ((rc = PtlGetId (server->ni, &server->my_id))) { PDEBUG ("PtlGetId", rc); diff --git a/lustre/portals/unals/bridge.h b/lustre/portals/unals/bridge.h index 9a90ab8d8dd705c2d7523d007a824c4c12d857ec..90ce3244fed2d900e63ed740e9d82f434db9c368 100644 --- a/lustre/portals/unals/bridge.h +++ b/lustre/portals/unals/bridge.h @@ -10,6 +10,12 @@ #define TCPNAL_PROCBRIDGE_H #include <portals/lib-p30.h> +#include <portals/nal.h> + +#define PTL_IFACE_TCP 1 +#define PTL_IFACE_ER 2 +#define PTL_IFACE_SS 3 +#define PTL_IFACE_MAX 4 typedef struct bridge { int alive; @@ -22,12 +28,6 @@ typedef struct bridge { } *bridge; -nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); - typedef int (*nal_initialize)(bridge); extern nal_initialize nal_table[PTL_IFACE_MAX]; diff --git a/lustre/portals/unals/procapi.c b/lustre/portals/unals/procapi.c index 00a7ae4deb60cf4e1cf6d6f6b7a2cae5a33cc60f..e40c4b9c9b1413a7f0c7cbf994317376aa0e12a9 100644 --- a/lustre/portals/unals/procapi.c +++ b/lustre/portals/unals/procapi.c @@ -95,7 +95,7 @@ static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, * cleanup nal state, reclaim the lower side thread and * its state using PTL_FINI codepoint */ -static int procbridge_shutdown(nal_t *n, int ni) +static void procbridge_shutdown(nal_t *n) { bridge b=(bridge)n->nal_data; procbridge p=(procbridge)b->local; @@ -114,16 +114,6 @@ static int procbridge_shutdown(nal_t *n, int ni) } while (1); free(p); - return(0); -} - - -/* Function: validate - * useless stub - */ -static int procbridge_validate(nal_t *nal, void *base, size_t extent) -{ - return(0); } @@ -187,18 +177,20 @@ static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds) return (milliseconds); } +/* forward decl */ +extern int procbridge_startup (nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); /* api_nal * the interface vector to allow the generic code to access * this nal. this is seperate from the library side nal_cb. * TODO: should be dyanmically allocated */ -static nal_t api_nal = { - ni: {0}, +nal_t procapi_nal = { nal_data: NULL, - forward: procbridge_forward, + startup: procbridge_startup, shutdown: procbridge_shutdown, - validate: procbridge_validate, + forward: procbridge_forward, yield: procbridge_yield, lock: procbridge_lock, unlock: procbridge_unlock @@ -206,7 +198,7 @@ static nal_t api_nal = { ptl_nid_t tcpnal_mynid; -/* Function: procbridge_interface +/* Function: procbridge_startup * * Arguments: pid: requested process id (port offset) * PTL_ID_ANY not supported. @@ -214,40 +206,34 @@ ptl_nid_t tcpnal_mynid; * and effectively ignored * actual: limits actually allocated and returned * - * Returns: a pointer to my statically allocated top side NAL - * structure + * Returns: portals rc * * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) +int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { nal_init_args_t args; + procbridge p; bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; + /* XXX nal_type is purely private to tcpnal here */ int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - if(initialized) return (&api_nal); + LASSERT(nal == &procapi_nal); init_unix_timer(); b=(bridge)malloc(sizeof(struct bridge)); p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; + nal->nal_data=b; b->local=p; - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; - args.nia_requested_pid = requested_pid; - args.nia_limits = &limits; + args.nia_requested_limits = requested_limits; + args.nia_actual_limits = actual_limits; args.nia_nal_type = nal_type; args.nia_bridge = b; @@ -259,19 +245,19 @@ nal_t *procbridge_interface(int num_interface, /* initialize notifier */ if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) { perror("socketpair failed"); - return NULL; + return PTL_FAIL; } if (!register_io_handler(p->notifier[1], READ_HANDLER, procbridge_notifier_handler, p)) { perror("fail to register notifier handler"); - return NULL; + return PTL_FAIL; } /* create nal thread */ if (pthread_create(&p->t, NULL, nal_thread, &args)) { perror("nal_init: pthread_create"); - return(NULL); + return PTL_FAIL; } do { @@ -285,10 +271,9 @@ nal_t *procbridge_interface(int num_interface, } while (1); if (p->nal_flags & NAL_FLAG_STOPPED) - return (NULL); + return PTL_FAIL; b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - return (&api_nal); + return PTL_OK; } diff --git a/lustre/portals/unals/procbridge.h b/lustre/portals/unals/procbridge.h index 965f83d6a0d2b3017ecbd7534bb7f1a5d4ee35bd..1c8e7dd87e10e9dccec6dc171186ac3f6b9b2a23 100644 --- a/lustre/portals/unals/procbridge.h +++ b/lustre/portals/unals/procbridge.h @@ -35,7 +35,8 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; + ptl_ni_limits_t *nia_requested_limits; + ptl_ni_limits_t *nia_actual_limits; int nia_nal_type; bridge nia_bridge; } nal_init_args_t; @@ -50,10 +51,6 @@ extern void *nal_thread(void *); #define MAX_PTLS 128 extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); extern void procbridge_wakeup_nal(procbridge p); #endif diff --git a/lustre/portals/unals/proclib.c b/lustre/portals/unals/proclib.c index 1cfb233e97ff2af09b444925e0b45960c4fcd0b7..af0745b72a6b81c4cc968dc03614d2c4f09accde 100644 --- a/lustre/portals/unals/proclib.c +++ b/lustre/portals/unals/proclib.c @@ -157,9 +157,6 @@ static void check_stopping(void *z) * We define a limit macro to place a ceiling on limits * for syntactic convenience */ -#define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - extern int tcpnal_init(bridge); nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; @@ -170,10 +167,8 @@ void *nal_thread(void *z) bridge b = args->nia_bridge; procbridge p=b->local; int rc; - ptl_pid_t pid_request; + ptl_process_id_t process_id; int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); b->nal_cb->nal_data=b; @@ -189,28 +184,21 @@ void *nal_thread(void *z) b->nal_cb->cb_callback=nal_callback; b->nal_cb->cb_dist=nal_dist; - pid_request = args->nia_requested_pid; - desired = *args->nia_limits; nal_type = args->nia_nal_type; - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); - - set_address(b,pid_request); + /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is + * about to do from the process_id passed to it...*/ + set_address(b,args->nia_requested_pid); + process_id.pid = b->nal_cb->ni.pid; + process_id.nid = b->nal_cb->ni.nid; + if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); /* initialize the generic 'library' level code */ - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); + rc = lib_init(b->nal_cb, process_id, + args->nia_requested_limits, + args->nia_actual_limits); /* * Whatever the initialization returned is passed back to the @@ -219,11 +207,11 @@ void *nal_thread(void *z) */ /* this should perform error checking */ pthread_mutex_lock(&p->mutex); - p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; + p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); - if (!rc) { + if (rc == PTL_OK) { /* the thunk function is called each time the timer loop performs an operation and returns to blocking mode. we overload this function to inform the api side that @@ -233,4 +221,3 @@ void *nal_thread(void *z) } return(0); } -#undef LIMIT diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index fb031ae1b52070b5511587d573a8ed281d4a6920..e42fda69d2cd8fd369a1ab9c5f7a89af965f9ff7 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -61,7 +61,7 @@ unsigned int portal_debug; unsigned int portal_printk; unsigned int portal_stack; -unsigned int portal_cerror; +unsigned int portal_cerror = 1; static unsigned int g_nal = 0; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index b328b5468ab43c31cd6865157d5c3858321270ea..5995e31e8b1582181859b4622e8f0109a78e24e5 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -160,7 +160,7 @@ void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, desc->bd_nob += len; - pers_bulk_add_page(desc, page, pageoffset, len); + ptlrpc_add_bulk_page(desc, page, pageoffset, len); } void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index b39d67340b5f1710467704b02160347cbf40947a..d29804d33c1d8447695fd7c9249b344aab67015b 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -319,7 +319,7 @@ void server_bulk_callback (ptl_event_t *ev) EXIT; } -static int ptlrpc_master_callback(ptl_event_t *ev) +static void ptlrpc_master_callback(ptl_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->mem_desc.user_ptr; void (*callback)(ptl_event_t *ev) = cbid->cbid_fn; @@ -334,35 +334,32 @@ static int ptlrpc_master_callback(ptl_event_t *ev) callback == server_bulk_callback); callback (ev); - return (0); } int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer) { struct ptlrpc_ni *pni; + __u32 peer_nal; ptl_nid_t peer_nid; - ptl_handle_ni_t peer_ni; int i; - char str[20]; + char str[PTL_NALFMT_SIZE]; int rc = lustre_uuid_to_peer(uuid->uuid, - &peer_ni, &peer_nid); + &peer_nal, &peer_nid); if (rc != 0) RETURN (rc); for (i = 0; i < ptlrpc_ninterfaces; i++) { pni = &ptlrpc_interfaces[i]; - if (!memcmp(&peer_ni, &pni->pni_ni_h, - sizeof (peer_ni))) { + if (pni->pni_number == peer_nal) { peer->peer_nid = peer_nid; peer->peer_ni = pni; return (0); } } - PtlSnprintHandle(str, sizeof(str), peer_ni); - CERROR("Can't find ptlrpc interface for "LPX64" ni %s\n", - peer_nid, str); + CERROR("Can't find ptlrpc interface for NAL %d, NID %s\n", + peer_nal, portals_nid2str(peer_nal, peer_nid, str)); return (-ENOENT); } @@ -385,7 +382,7 @@ void ptlrpc_ni_fini(struct ptlrpc_ni *pni) LBUG(); case PTL_OK: - kportal_put_ni (pni->pni_number); + PtlNIFini(pni->pni_ni_h); return; case PTL_EQ_IN_USE: @@ -407,25 +404,48 @@ int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni) { int rc; char str[20]; - ptl_handle_ni_t *nip = kportal_get_ni (number); + ptl_handle_ni_t nih; - if (nip == NULL) { - CDEBUG (D_NET, "Network interface %s not loaded\n", name); + /* We're not passing any limits yet... */ + rc = PtlNIInit(number, 0, NULL, NULL, &nih); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { + CDEBUG (D_NET, "Can't init network interface %s: %d\n", + name, rc); return (-ENOENT); } - PtlSnprintHandle(str, sizeof(str), *nip); + PtlSnprintHandle(str, sizeof(str), nih); CDEBUG (D_NET, "init %d %s: %s\n", number, name, str); pni->pni_name = name; pni->pni_number = number; - pni->pni_ni_h = *nip; + pni->pni_ni_h = nih; pni->pni_eq_h = PTL_INVALID_HANDLE; - rc = PtlEQAlloc(pni->pni_ni_h, PTLRPC_NUM_EQ, PTLRPC_EQ_CALLBACK, + /* CAVEAT EMPTOR: how we process portals events is _radically_ + * different depending on... */ +#ifdef __KERNEL__ + /* kernel portals calls our master callback when events are added to + * the event queue. In fact lustre never pulls events off this queue, + * so it's only sized for some debug history. */ + rc = PtlEQAlloc(pni->pni_ni_h, 1024, ptlrpc_master_callback, &pni->pni_eq_h); - +#else + /* liblustre calls the master callback when it removes events from the + * event queue. The event queue has to be big enough not to drop + * anything */ +# if CRAY_PORTALS + /* cray portals implements a non-standard callback to notify us there + * are buffered events even when the app is not doing a filesystem + * call. */ + rc = PtlEQAlloc(pni->pni_ni_h, 10240, cray_portals_callback, + &pni->pni_eq_h); +# else + rc = PtlEQAlloc(pni->pni_ni_h, 10240, PTL_EQ_HANDLER_NONE, + &pni->pni_eq_h); +# endif +#endif if (rc != PTL_OK) GOTO (fail, rc = -ENOMEM); @@ -533,7 +553,8 @@ liblustre_wait_event (int timeout) return found_something; } -static int cray_portals_callback(ptl_event_t *ev) +#ifdef CRAY_PORTALS +static void cray_portals_callback(ptl_event_t *ev) { /* We get a callback from the client Cray portals implementation * whenever anyone calls PtlEQPoll(), and an event queue with a @@ -545,13 +566,20 @@ static int cray_portals_callback(ptl_event_t *ev) * * Otherwise, we're already eagerly consuming events and we'd * handle events out of order if we recursed. */ - if (liblustre_waiting) - return; - - liblustre_wait_event(0); + if (!liblustre_waiting) + liblustre_wait_event(0); } +#endif #endif /* __KERNEL__ */ +int ptlrpc_default_nal(void) +{ + if (ptlrpc_ninterfaces == 0) + return (-ENOENT); + + return (ptlrpc_interfaces[0].pni_number); +} + int ptlrpc_init_portals(void) { /* Add new portals network interfaces here. @@ -565,7 +593,8 @@ int ptlrpc_init_portals(void) {GMNAL, "gmnal"}, {IBNAL, "ibnal"}, {TCPNAL, "tcpnal"}, - {SCIMACNAL, "scimacnal"}}; + {SCIMACNAL, "scimacnal"}, + {CRAY_KB_ERNAL, "cray_kb_ernal"}}; int rc; int i; diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 2b10ac6e9391d407a43ef8b460c60615085f5887..9f8765b9e925a9c30d72d055fb690accf5bbec46 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -23,7 +23,6 @@ #define DEBUG_SUBSYSTEM S_RPC #ifndef __KERNEL__ #include <liblustre.h> -#include <portals/lib-types.h> #endif #include <linux/obd_support.h> #include <linux/lustre_net.h> @@ -93,20 +92,6 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, RETURN (0); } -static void ptlrpc_fill_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc) -{ - LASSERT(ptl_md_max_iovs() == 0 || - (desc->bd_iov_count <= ptl_md_max_iovs())); - - if (ptl_requires_iov() || desc->bd_iov_count > 0) { - md->options |= PTLRPC_PTL_MD_IOV; - md->start = &desc->bd_iov[0]; - md->niov = desc->bd_iov_count; - } else { - md->start = ptl_iov_base(&desc->bd_iov[0]); - } -} - int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) { int rc; @@ -127,13 +112,12 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) desc->bd_success = 0; peer = &desc->bd_export->exp_connection->c_peer; - md.length = desc->bd_nob; + md.user_ptr = &desc->bd_cbid; md.eventq = peer->peer_ni->pni_eq_h; md.threshold = 2; /* SENT and ACK/REPLY */ md.options = PTLRPC_MD_OPTIONS; + ptlrpc_fill_bulk_md(&md, desc); - ptlrpc_fill_md(&md, desc); - md.user_ptr = &desc->bd_cbid; LASSERT (desc->bd_cbid.cbid_fn == server_bulk_callback); LASSERT (desc->bd_cbid.cbid_arg == desc); @@ -154,8 +138,8 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) remote_id.pid = 0; CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d on %s " - "nid "LPX64" pid %d xid "LPX64"\n", - md.niov, md.length, desc->bd_portal, peer->peer_ni->pni_name, + "nid "LPX64" pid %d xid "LPX64"\n", desc->bd_iov_count, + desc->bd_nob, desc->bd_portal, peer->peer_ni->pni_name, remote_id.nid, remote_id.pid, xid); /* Network is about to get at the memory */ @@ -240,14 +224,14 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) peer = &desc->bd_import->imp_connection->c_peer; - md.length = desc->bd_nob; + md.user_ptr = &desc->bd_cbid; md.eventq = peer->peer_ni->pni_eq_h; md.threshold = 1; /* PUT or GET */ md.options = PTLRPC_MD_OPTIONS | ((desc->bd_type == BULK_GET_SOURCE) ? PTL_MD_OP_GET : PTL_MD_OP_PUT); - ptlrpc_fill_md(&md, desc); - md.user_ptr = &desc->bd_cbid; + ptlrpc_fill_bulk_md(&md, desc); + LASSERT (desc->bd_cbid.cbid_fn == client_bulk_callback); LASSERT (desc->bd_cbid.cbid_arg == desc); @@ -285,7 +269,7 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) CDEBUG(D_NET, "Setup bulk %s buffers: %u pages %u bytes, xid "LPX64", " "portal %u on %s\n", desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink", - md.niov, md.length, + desc->bd_iov_count, desc->bd_nob, req->rq_xid, desc->bd_portal, peer->peer_ni->pni_name); RETURN(0); } diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c index 7fcccd2f2c10f15c42c5787fc8f5f7e0e7c11512..fff71f0ee168d5eace888f3e2573e9e5a29a0623 100644 --- a/lustre/ptlrpc/pers.c +++ b/lustre/ptlrpc/pers.c @@ -36,8 +36,18 @@ #ifdef __KERNEL__ #ifndef CRAY_PORTALS -void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page, - int pageoffset, int len) +void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc) +{ + LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); + LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS))); + + md->options |= PTL_MD_KIOV; + md->start = &desc->bd_iov[0]; + md->length = desc->bd_iov_count; +} + +void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, + int pageoffset, int len) { ptl_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count]; @@ -48,10 +58,20 @@ void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page, desc->bd_iov_count++; } #else -void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page, - int pageoffset, int len) +void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc) +{ + LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); + LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS))); + + md->options |= (PTL_MD_IOVEC | PTL_MD_PHYS); + md->start = &desc->bd_iov[0]; + md->length = desc->bd_iov_count; +} + +void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, + int pageoffset, int len) { - struct iovec *iov = &desc->bd_iov[desc->bd_iov_count]; + ptl_md_iovec_t *iov = &desc->bd_iov[desc->bd_iov_count]; /* Should get a compiler warning if sizeof(physaddr) > sizeof(void *) */ iov->iov_base = (void *)(page_to_phys(page) + pageoffset); @@ -62,17 +82,39 @@ void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page, #endif #else /* !__KERNEL__ */ +void ptlrpc_fill_bulk_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc) +{ + LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS))); -int can_merge_iovs(struct iovec *existing, struct iovec *candidate) + if (desc->bd_iov_count == 1) { + md->start = desc->bd_iov[0].iov_base; + md->length = desc->bd_iov[0].iov_len; + return; + } + +#if CRAY_PORTALS + LBUG(); +#endif + md->options |= PTL_MD_IOVEC; + md->start = &desc->bd_iov[0]; + md->length = desc->bd_iov_count; +} + +static int can_merge_iovs(ptl_md_iovec_t *existing, ptl_md_iovec_t *candidate) { - if (existing->iov_base + existing->iov_len == candidate->iov_base) + if (existing->iov_base + existing->iov_len == candidate->iov_base) return 1; + + CERROR("Can't merge iovs %p for %x, %p for %x\n", + existing->iov_base, existing->iov_len, + candidate->iov_base, candidate->iov_len); return 0; } -void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page, - int pageoffset, int len) + +void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, + int pageoffset, int len) { - struct iovec *iov = &desc->bd_iov[desc->bd_iov_count]; + ptl_md_iovec_t *iov = &desc->bd_iov[desc->bd_iov_count]; iov->iov_base = page->addr + pageoffset; iov->iov_len = len; diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index d7c23785d3a58ed971dfec636ed1965403a01a45..42648cfb9c7328823a5f6626b2d26e8a2d4bdcf2 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -108,57 +108,15 @@ enum { int ptlrpc_expire_one_request(struct ptlrpc_request *req); -/* XXX these should be run-time checks so we can have one build run against - * many nals */ -#if defined(__KERNEL__) -#define ptl_requires_iov() 1 -#else -#define ptl_requires_iov() 0 -#endif - -#if defined(__KERNEL__) -# if defined(CRAY_PORTALS) -# define PTLRPC_PTL_MD_IOV (PTL_MD_IOVEC | PTL_MD_PHYS) -# else -# define PTLRPC_PTL_MD_IOV PTL_MD_KIOV -# endif -#else -# define PTLRPC_PTL_MD_IOV PTL_MD_IOVEC -#endif - #if !defined(__KERNEL__) && defined(CRAY_PORTALS) -#define ptl_md_max_iovs() 1 -#else -#define ptl_md_max_iovs() 0 /* unlimited */ -#endif - -/* XXX hopefully we can make the iov a consistent type across portals imps */ -#if defined(__KERNEL__) -#define ptl_iov_base(kiov) (NULL) /* this is meaningless */ -#else -#define ptl_iov_base(iov) ((iov)->iov_base) -#endif - -#ifdef __KERNEL__ -/* portals calls the callback when the event is added to the queue, so we don't - * care if we lose events */ -# define PTLRPC_NUM_EQ 1024 -# define PTLRPC_EQ_CALLBACK ptlrpc_master_callback -#else -/* liblustre: no callback, or only when app polls event queues, so allocate a - * nice big event queue to ensure we don't drop any */ -# define PTLRPC_NUM_EQ 10240 -# if CRAY_PORTALS -int cray_portals_callback(ptl_event_t *ev); -# define PTLRPC_EQ_CALLBACK cray_portals_callback -# else -# define PTLRPC_EQ_CALLBACK PTL_EQ_HANDLER_NONE -# endif +/* forward ref in events.c */ +static void cray_portals_callback(ptl_event_t *ev); #endif /* pers.c */ -void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page, - int pageoffset, int len); +void ptlrpc_fill_bulk_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc); +void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, + int pageoffset, int len); /* pinger.c */ int ptlrpc_start_pinger(void);