diff --git a/libcfs/.empty b/libcfs/.empty deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/libcfs/autoconf/.empty/.empty b/libcfs/autoconf/.empty/.empty deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/lnet/.cvsignore b/lnet/.cvsignore deleted file mode 100644 index f30d8625f4ae34b5f3c6e405883d32db59ae42dc..0000000000000000000000000000000000000000 --- a/lnet/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -Kernelenv -Makefile -autoMakefile -autoMakefile.in -aclocal.m4 -autom4te.cache -config.log -config.status -configure -.*.cmd -.depend diff --git a/lnet/ChangeLog b/lnet/ChangeLog deleted file mode 100644 index 96d8d2545952f4bda374635111f9d73173032035..0000000000000000000000000000000000000000 --- a/lnet/ChangeLog +++ /dev/null @@ -1,629 +0,0 @@ -tbd Sun Microsystems, Inc. - * version 1.6.6 - * Support for networks: - socklnd - any kernel supported by Lustre, - qswlnd - Qsnet kernel modules 5.20 and later, - openiblnd - IbGold 1.8.2, - o2iblnd - OFED 1.1, 1.2.0, 1.2.5, and 1.3 - viblnd - Voltaire ibhost 3.4.5 and later, - ciblnd - Topspin 3.2.0, - iiblnd - Infiniserv 3.3 + PathBits patch, - gmlnd - GM 2.1.22 and later, - mxlnd - MX 1.2.1 or later, - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - -Severity : -Bugzilla : -Description: -Details : - -Severity : normal -Bugzilla : 15272 -Description: ptl_send_rpc hits LASSERT when ptl_send_buf fails -Details : only hits under out-of-memory situations - - -------------------------------------------------------------------------------- - - -04-26-2008 Sun Microsystems, Inc. - * version 1.6.5 - * Support for networks: - socklnd - any kernel supported by Lustre, - qswlnd - Qsnet kernel modules 5.20 and later, - openiblnd - IbGold 1.8.2, - o2iblnd - OFED 1.1 and 1.2.0, 1.2.5 - viblnd - Voltaire ibhost 3.4.5 and later, - ciblnd - Topspin 3.2.0, - iiblnd - Infiniserv 3.3 + PathBits patch, - gmlnd - GM 2.1.22 and later, - mxlnd - MX 1.2.1 or later, - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - -Severity : normal -Bugzilla : 14322 -Description: excessive debug information removed -Details : excessive debug information removed - -Severity : major -Bugzilla : 15712 -Description: ksocknal_create_conn() hit ASSERTION during connection race -Details : ksocknal_create_conn() hit ASSERTION during connection race - -Severity : major -Bugzilla : 13983 -Description: ksocknal_send_hello() hit ASSERTION while connecting race -Details : ksocknal_send_hello() hit ASSERTION while connecting race - -Severity : major -Bugzilla : 14425 -Description: o2iblnd/ptllnd credit deadlock in a routed config. -Details : o2iblnd/ptllnd credit deadlock in a routed config. - -Severity : normal -Bugzilla : 14956 -Description: High load after starting lnet -Details : gmlnd should sleep in rx thread in interruptible way. Otherwise, - uptime utility reports high load that looks confusingly. - -Severity : normal -Bugzilla : 14838 -Description: ksocklnd fails to establish connection if accept_port is high -Details : PID remapping must not be done for active (outgoing) connections - --------------------------------------------------------------------------------- - -2008-01-11 Sun Microsystems, Inc. - * version 1.4.12 - * Support for networks: - socklnd - any kernel supported by Lustre, - qswlnd - Qsnet kernel modules 5.20 and later, - openiblnd - IbGold 1.8.2, - o2iblnd - OFED 1.1 and 1.2.0, 1.2.5 - viblnd - Voltaire ibhost 3.4.5 and later, - ciblnd - Topspin 3.2.0, - iiblnd - Infiniserv 3.3 + PathBits patch, - gmlnd - GM 2.1.22 and later, - mxlnd - MX 1.2.1 or later, - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x -Severity : normal -Bugzilla : 14387 -Description: liblustre network error -Details : liblustre clients should understand LNET_ACCEPT_PORT environment - variable even if they don't start lnet acceptor. - -Severity : normal -Bugzilla : 14300 -Description: Strange message from lnet (Ignoring prediction from the future) -Details : Incorrect calculation of peer's last_alive value in ksocklnd - --------------------------------------------------------------------------------- - -2007-12-07 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.6.4 - * Support for networks: - socklnd - any kernel supported by Lustre, - qswlnd - Qsnet kernel modules 5.20 and later, - openiblnd - IbGold 1.8.2, - o2iblnd - OFED 1.1 and 1.2.0, 1.2.5. - viblnd - Voltaire ibhost 3.4.5 and later, - ciblnd - Topspin 3.2.0, - iiblnd - Infiniserv 3.3 + PathBits patch, - gmlnd - GM 2.1.22 and later, - mxlnd - MX 1.2.1 or later, - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - -Severity : normal -Bugzilla : 14238 -Description: ASSERTION(me == md->md_me) failed in lnet_match_md() - -Severity : normal -Bugzilla : 12494 -Description: increase send queue size for ciblnd/openiblnd - -Severity : normal -Bugzilla : 12302 -Description: new userspace socklnd -Details : Old userspace tcpnal that resided in lnet/ulnds/socklnd replaced - with new one - usocklnd. - -Severity : enhancement -Bugzilla : 11686 -Description: Console message flood -Details : Make cdls ratelimiting more tunable by adding several tunable in - procfs /proc/sys/lnet/console_{min,max}_delay_centisecs and - /proc/sys/lnet/console_backoff. - --------------------------------------------------------------------------------- - -2007-09-27 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.6.3 - * Support for networks: - socklnd - any kernel supported by Lustre, - qswlnd - Qsnet kernel modules 5.20 and later, - openiblnd - IbGold 1.8.2, - o2iblnd - OFED 1.1 and 1.2, - viblnd - Voltaire ibhost 3.4.5 and later, - ciblnd - Topspin 3.2.0, - iiblnd - Infiniserv 3.3 + PathBits patch, - gmlnd - GM 2.1.22 and later, - mxlnd - MX 1.2.1 or later, - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - -Severity : normal -Bugzilla : 12782 -Description: /proc/sys/lnet has non-sysctl entries -Details : Updating dump_kernel/daemon_file/debug_mb to use sysctl variables - -Severity : major -Bugzilla : 13236 -Description: TOE Kernel panic by ksocklnd -Details : offloaded sockets provide their own implementation of sendpage, - can't call tcp_sendpage() directly - -Severity : normal -Bugzilla : 10778 -Description: kibnal_shutdown() doesn't finish; lconf --cleanup hangs -Details : races between lnd_shutdown and peer creation prevent - lnd_shutdown from finishing. - -Severity : normal -Bugzilla : 13279 -Description: open files rlimit 1024 reached while liblustre testing -Details : ulnds/socklnd must close open socket after unsuccessful - 'say hello' attempt. - -Severity : major -Bugzilla : 13482 -Description: build error -Details : fix typos in gmlnd, ptllnd and viblnd - ------------------------------------------------------------------------------- - -2007-07-30 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.6.1 - * Support for networks: - socklnd - kernels up to 2.6.16, - qswlnd - Qsnet kernel modules 5.20 and later, - openiblnd - IbGold 1.8.2, - o2iblnd - OFED 1.1 and 1.2 - viblnd - Voltaire ibhost 3.4.5 and later, - ciblnd - Topspin 3.2.0, - iiblnd - Infiniserv 3.3 + PathBits patch, - gmlnd - GM 2.1.22 and later, - mxlnd - MX 1.2.1 or later, - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - -2007-06-21 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.11 - * Support for networks: - socklnd - kernels up to 2.6.16, - qswlnd - Qsnet kernel modules 5.20 and later, - openiblnd - IbGold 1.8.2, - o2iblnd - OFED 1.1 - viblnd - Voltaire ibhost 3.4.5 and later, - ciblnd - Topspin 3.2.0, - iiblnd - Infiniserv 3.3 + PathBits patch, - gmlnd - GM 2.1.22 and later, - mxlnd - MX 1.2.1 or later, - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - -Severity : minor -Bugzilla : 13288 -Description: Initialize cpumask before use - -Severity : major -Bugzilla : 12014 -Description: ASSERTION failures when upgrading to the patchless zero-copy - socklnd -Details : This bug affects "rolling upgrades", causing an inconsistent - protocol version negotiation and subsequent assertion failure - during rolling upgrades after the first wave of upgrades. - -Severity : minor -Bugzilla : 11223 -Details : Change "dropped message" CERRORs to D_NETERROR so they are - logged instead of creating "console chatter" when a lustre - timeout races with normal RPC completion. - -Severity : minor -Details : lnet_clear_peer_table can wait forever if user forgets to - clear a lazy portal. - -Severity : minor -Details : libcfs_id2str should check pid against LNET_PID_ANY. - -Severity : major -Bugzilla : 10916 -Description: added LNET self test -Details : landing b_self_test - -Severity : minor -Frequency : rare -Bugzilla : 12227 -Description: cfs_duration_{u,n}sec() wrongly calculate nanosecond part of - struct timeval. -Details : do_div() macro is used incorrectly. - -2007-04-23 Cluster File Systems, Inc. <info@clusterfs.com> - -Severity : normal -Bugzilla : 11680 -Description: make panic on lbug configurable - -Severity : major -Bugzilla : 12316 -Description: Add OFED1.2 support to o2iblnd -Details : o2iblnd depends on OFED's modules, if out-tree OFED's modules - are installed (other than kernel's in-tree infiniband), there - could be some problem while insmod o2iblnd (mismatch CRC of - ib_* symbols). - If extra Module.symvers is supported in kernel (i.e, 2.6.17), - this link provides solution: - https://bugs.openfabrics.org/show_bug.cgi?id=355 - if extra Module.symvers is not supported in kernel, we will - have to run the script in bug 12316 to update - $LINUX/module.symvers before building o2iblnd. - More details about this are in bug 12316. - ------------------------------------------------------------------------------- - -2007-04-01 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.10 / 1.6.0 - * Support for networks: - socklnd - kernels up to 2.6.16, - qswlnd - Qsnet kernel modules 5.20 and later, - openiblnd - IbGold 1.8.2, - o2iblnd - OFED 1.1, - viblnd - Voltaire ibhost 3.4.5 and later, - ciblnd - Topspin 3.2.0, - iiblnd - Infiniserv 3.3 + PathBits patch, - gmlnd - GM 2.1.22 and later, - mxlnd - MX 1.2.1 or later, - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - -Severity : minor -Frequency : rare -Description: Ptllnd didn't init kptllnd_data.kptl_idle_txs before it could be - possibly accessed in kptllnd_shutdown. Ptllnd should init - kptllnd_data.kptl_ptlid2str_lock before calling kptllnd_ptlid2str. - -Severity : normal -Frequency : rare -Description: gmlnd ignored some transmit errors when finalizing lnet messages. - -Severity : minor -Frequency : rare -Description: ptllnd logs a piece of incorrect debug info in kptllnd_peer_handle_hello. - -Severity : minor -Frequency : rare -Description: the_lnet.ln_finalizing was not set when the current thread is - about to complete messages. It only affects multi-threaded - user space LNet. - -Severity : normal -Frequency : rare -Bugzilla : 11472 -Description: Changed the default kqswlnd ntxmsg=512 - -Severity : major -Frequency : rare -Bugzilla : 12458 -Description: Assertion failure in kernel ptllnd caused by posting passive - bulk buffers before connection establishment complete. - -Severity : major -Frequency : rare -Bugzilla : 12445 -Description: A race in kernel ptllnd between deleting a peer and posting - new communications for it could hang communications - - manifesting as "Unexpectedly long timeout" messages. - -Severity : major -Frequency : rare -Bugzilla : 12432 -Description: Kernel ptllnd lock ordering issue could hang a node. - -Severity : major -Frequency : rare -Bugzilla : 12016 -Description: node crash on socket teardown race - -Severity : minor -Frequency : 'lctl peer_list' issued on a mx net -Bugzilla : 12237 -Description: Enable lctl's peer_list for MXLND - -Severity : major -Frequency : after Ptllnd timeouts and portals congestion -Bugzilla : 11659 -Description: Credit overflows -Details : This was a bug in ptllnd connection establishment. The fix - implements better peer stamps to disambiguate connection - establishment and ensure both peers enter the credit flow - state machine consistently. - -Severity : major -Frequency : rare -Bugzilla : 11394 -Description: kptllnd didn't propagate some network errors up to LNET -Details : This bug was spotted while investigating 11394. The fix - ensures network errors on sends and bulk transfers are - propagated to LNET/lustre correctly. - -Severity : enhancement -Bugzilla : 10316 -Description: Fixed console chatter in case of -ETIMEDOUT. - -Severity : enhancement -Bugzilla : 11684 -Description: Added D_NETTRACE for recording network packet history - (initially only for ptllnd). Also a separate userspace - ptllnd facility to gather history which should really be - covered by D_NETTRACE too, if only CDEBUG recorded history in - userspace. - -Severity : major -Frequency : rare -Bugzilla : 11616 -Description: o2iblnd handle early RDMA_CM_EVENT_DISCONNECTED. -Details : If the fabric is lossy, an RDMA_CM_EVENT_DISCONNECTED - callback can occur before a connection has actually been - established. This caused an assertion failure previously. - -Severity : enhancement -Bugzilla : 11094 -Description: Multiple instances for o2iblnd -Details : Allow multiple instances of o2iblnd to enable networking over - multiple HCAs and routing between them. - -Severity : major -Bugzilla : 11201 -Description: lnet deadlock in router_checker -Details : turned ksnd_connd_lock, ksnd_reaper_lock, and ksock_net_t:ksnd_lock - into BH locks to eliminate potential deadlock caused by - ksocknal_data_ready() preempting code holding these locks. - -Severity : major -Bugzilla : 11126 -Description: Millions of failed socklnd connection attempts cause a very slow FS -Details : added a new route flag ksnr_scheduled to distinguish from - ksnr_connecting, so that a peer connection request is only turned - down for race concerns when an active connection to the same peer - is under progress (instead of just being scheduled). - ------------------------------------------------------------------------------- - -2007-02-09 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.9 - * Support for networks: - socklnd - kernels up to 2.6.16 - qswlnd - Qsnet kernel modules 5.20 and later - openiblnd - IbGold 1.8.2 - o2iblnd - OFED 1.1 - viblnd - Voltaire ibhost 3.4.5 and later - ciblnd - Topspin 3.2.0 - iiblnd - Infiniserv 3.3 + PathBits patch - gmlnd - GM 2.1.22 and later - mxlnd - MX 1.2.1 or later - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - * bug fixes - -Severity : major on XT3 -Bugzilla : none -Description: libcfs overwrites /proc/sys/portals -Details : libcfs created a symlink from /proc/sys/portals to - /proc/sys/lnet for backwards compatibility. This is no - longer required and makes the Cray portals /proc variables - inaccessible. - -Severity : minor -Bugzilla : 11312 -Description: OFED FMR API change -Details : This changes parameter usage to reflect a change in - ib_fmr_pool_map_phys() between OFED 1.0 and OFED 1.1. Note - that FMR support is only used in experimental versions of the - o2iblnd - this change does not affect standard usage at all. - -Severity : enhancement -Bugzilla : 11245 -Description: new ko2iblnd module parameter: ib_mtu -Details : the default IB MTU of 2048 performs badly on 23108 Tavor - HCAs. You can avoid this problem by setting the MTU to 1024 - using this module parameter. - -Severity : enhancement -Bugzilla : 11118/11620 -Description: ptllnd small request message buffer alignment fix -Details : Set the PTL_MD_LOCAL_ALIGN8 option on small message receives. - Round up small message size on sends in case this option - is not supported. 11620 was a defect in the initial - implementation which effectively asserted all peers had to be - running the correct protocol version which was fixed by always - NAK-ing such requests and handling any misalignments they - introduce. - -Severity : minor -Frequency : rarely -Description: When kib(nal|lnd)_del_peer() is called upon a peer whose - ibp_tx_queue is not empty, kib(nal|lnd)_destroy_peer()'s - 'LASSERT(list_empty(&peer->ibp_tx_queue))' will fail. - -Severity : enhancement -Bugzilla : 11250 -Description: Patchless ZC(zero copy) socklnd -Details : New protocol for socklnd, socklnd can support zero copy without - kernel patch, it's compatible with old socklnd. Checksum is - moved from tunables to modparams. - -Severity : minor -Frequency : rarely -Description: When ksocknal_del_peer() is called upon a peer whose - ksnp_tx_queue is not empty, ksocknal_destroy_peer()'s - 'LASSERT(list_empty(&peer->ksnp_tx_queue))' will fail. - -Severity : normal -Frequency : when ptlrpc is under heavy use and runs out of request buffer -Bugzilla : 11318 -Description: In lnet_match_blocked_msg(), md can be used without holding a - ref on it. - -Severity : minor -Frequency : very rarely -Bugzilla : 10727 -Description: If ksocknal_lib_setup_sock() fails, a ref on peer is lost. - If connd connects a route which has been closed by - ksocknal_shutdown(), ksocknal_create_routes() may create new - routes which hold references on the peer, causing shutdown - process to wait for peer to disappear forever. - -Severity : enhancement -Bugzilla : 11234 -Description: Dump XT3 portals traces on kptllnd timeout -Details : Set the kptllnd module parameter "ptltrace_on_timeout=1" to - dump Cray portals debug traces to a file. The kptllnd module - parameter "ptltrace_basename", default "/tmp/lnet-ptltrace", - is the basename of the dump file. - -Severity : major -Frequency : infrequent -Bugzilla : 11308 -Description: kernel ptllnd fix bug in connection re-establishment -Details : Kernel ptllnd could produce protocol errors e.g. illegal - matchbits and/or violate the credit flow protocol when trying - to re-establish a connection with a peer after an error or - timeout. - -Severity : enhancement -Bugzilla : 10316 -Description: Allow /proc/sys/lnet/debug to be set symbolically -Details : Allow debug and subsystem debug values to be read/set by name - in addition to numerically, for ease of use. - -Severity : normal -Frequency : only in configurations with LNET routers -Bugzilla : 10316 -Description: routes automatically marked down and recovered -Details : In configurations with LNET routers if a router fails routers - now actively try to recover routes that are down, unless they - are marked down by an administrator. - ------------------------------------------------------------------------------- - -2006-12-09 Cluster File Systems, Inc. <info@clusterfs.com> - -Severity : critical -Frequency : very rarely, in configurations with LNET routers and TCP -Bugzilla : 10889 -Description: incorrect data written to files on OSTs -Details : In certain high-load conditions incorrect data may be written - to files on the OST when using TCP networks. - ------------------------------------------------------------------------------- - -2006-07-31 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.7 - - rework CDEBUG messages rate-limiting mechanism b=10375 - - add per-socket tunables for socklnd if the kernel is patched b=10327 - ------------------------------------------------------------------------------- - -2006-02-15 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.6 - - fix use of portals/lnet pid to avoid dropping RPCs b=10074 - - iiblnd wasn't mapping all memory, resulting in comms errors b=9776 - - quiet LNET startup LNI message for liblustre b=10128 - - Better console error messages if 'ip2nets' can't match an IP address - - Fixed overflow/use-before-set bugs in linux-time.h - - Fixed ptllnd bug that wasn't initialising rx descriptors completely - - LNET teardown failed an assertion about the route table being empty - - Fixed a crash in LNetEQPoll(<invalid handle>) - - Future protocol compatibility work (b_rls146_lnetprotovrsn) - - improve debug message for liblustre/Catamount nodes (b=10116) - -2005-10-10 Cluster File Systems, Inc. <info@clusterfs.com> - * Configuration change for the XT3 - The PTLLND is now used to run Lustre over Portals on the XT3. - The configure option(s) --with-cray-portals are no longer - used. Rather --with-portals=<path-to-portals-includes> is - used to enable building on the XT3. In addition to enable - XT3 specific features the option --enable-cray-xt3 must be - used. - -2005-10-10 Cluster File Systems, Inc. <info@clusterfs.com> - * Portals has been removed, replaced by LNET. - LNET is new networking infrastructure for Lustre, it includes a - reorganized network configuration mode (see the user - documentation for full details) as well as support for routing - between different network fabrics. Lustre Networking Devices - (LNDS) for the supported network fabrics have also been created - for this new infrastructure. - -2005-08-08 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.4 - * bug fixes - -Severity : major -Frequency : rare (large Voltaire clusters only) -Bugzilla : 6993 -Description: the default number of reserved transmit descriptors was too low - for some large clusters -Details : As a workaround, the number was increased. A proper fix includes - a run-time tunable. - -2005-06-02 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.3 - * bug fixes - -Severity : major -Frequency : occasional (large-scale events, cluster reboot, network failure) -Bugzilla : 6411 -Description: too many error messages on console obscure actual problem and - can slow down/panic server, or cause recovery to fail repeatedly -Details : enable rate-limiting of console error messages, and some messages - that were console errors now only go to the kernel log - -Severity : enhancement -Bugzilla : 1693 -Description: add /proc/sys/portals/catastrophe entry which will report if - that node has previously LBUGged - -2005-04-06 Cluster File Systems, Inc. <info@clusterfs.com> - * bugs - - update gmnal to use PTL_MTU, fix module refcounting (b=5786) - -2005-04-04 Cluster File Systems, Inc. <info@clusterfs.com> - * bugs - - handle error return code in kranal_check_fma_rx() (5915,6054) - -2005-02-04 Cluster File Systems, Inc. <info@clusterfs.com> - * miscellania - - update vibnal (Voltaire IB NAL) - - update gmnal (Myrinet NAL), gmnalid - -2005-02-04 Eric Barton <eeb@bartonsoftware.com> - - * Landed portals:b_port_step as follows... - - - removed CFS_DECL_SPIN* - just use 'spinlock_t' and initialise with spin_lock_init() - - - removed CFS_DECL_MUTEX* - just use 'struct semaphore' and initialise with init_mutex() - - - removed CFS_DECL_RWSEM* - just use 'struct rw_semaphore' and initialise with init_rwsem() - - - renamed cfs_sleep_chan -> cfs_waitq - cfs_sleep_link -> cfs_waitlink - - - fixed race in linux version of arch-independent socknal - (the ENOMEM/EAGAIN decision). - - - Didn't fix problems in Darwin version of arch-independent socknal - (resetting socket callbacks, eager ack hack, ENOMEM/EAGAIN decision) - - - removed libcfs types from non-socknal header files (only some types - in the header files had been changed; the .c files hadn't been - updated at all). diff --git a/lnet/Kernelenv.in b/lnet/Kernelenv.in deleted file mode 100644 index 59eda309e80c03d4b663988e5491c9a5b8df2796..0000000000000000000000000000000000000000 --- a/lnet/Kernelenv.in +++ /dev/null @@ -1,6 +0,0 @@ -EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include -# lnet/utils/debug.c wants <linux/version.h> from userspace. sigh. -HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS) -LIBREADLINE := @LIBREADLINE@ -# 2.5's makefiles aren't nice to cross dir libraries in host programs -PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/lnet/Kernelenv.mk b/lnet/Kernelenv.mk deleted file mode 100644 index d973e5da2b76f671fbff82188c64dc90bc0aa0c0..0000000000000000000000000000000000000000 --- a/lnet/Kernelenv.mk +++ /dev/null @@ -1,4 +0,0 @@ -EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include -HOSTCFLAGS := $(EXTRA_CFLAGS) -# the kernel doesn't want us to build archives for host binaries :/ -PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/lnet/LICENSE b/lnet/LICENSE deleted file mode 100644 index 92728f4d300d2b6d965a4f0aba46552b1831c118..0000000000000000000000000000000000000000 --- a/lnet/LICENSE +++ /dev/null @@ -1,363 +0,0 @@ -Each file in this distribution should contain a header stating the -copyright owner(s), and the licensing terms for that module. Some -files are not eligible for copyright protection, and contain neither. - -All files in this subtree are licensed under the terms and conditions -of the GNU General Public License version 2. - -Reproduced below is the GPL v2, and Linus's clarifying statement from -the Linux kernel source code: - ----------------------------------------- - - NOTE! This copyright does *not* cover user programs that use kernel - services by normal system calls - this is merely considered normal use - of the kernel, and does *not* fall under the heading of "derived work". - Also note that the GPL below is copyrighted by the Free Software - Foundation, but the instance of code that it refers to (the Linux - kernel) is copyrighted by me and others who actually wrote it. - - Linus Torvalds - ----------------------------------------- - - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) 19yy <name of author> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19yy name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - <signature of Ty Coon>, 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/lnet/Makefile.in b/lnet/Makefile.in deleted file mode 100644 index 9109302c0d15f3ea437bf180a01ec903087b8b52..0000000000000000000000000000000000000000 --- a/lnet/Makefile.in +++ /dev/null @@ -1,8 +0,0 @@ -subdir-m += libcfs - -lnet-subdirs += lnet -lnet-subdirs += klnds -lnet-subdirs += selftest -subdir-m += $(lnet-subdirs) - -@INCLUDE_RULES@ diff --git a/lnet/autoMakefile.am b/lnet/autoMakefile.am deleted file mode 100644 index d8d062eb0858ea7e7699126cf68a49d6a0892091..0000000000000000000000000000000000000000 --- a/lnet/autoMakefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = libcfs lnet klnds ulnds selftest doc utils include \ - autoconf - -sources: - $(MAKE) sources -C libcfs diff --git a/lnet/autoconf/.cvsignore b/lnet/autoconf/.cvsignore deleted file mode 100644 index 282522db0342d8750454b3dc162493b5fc709cc8..0000000000000000000000000000000000000000 --- a/lnet/autoconf/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lnet/autoconf/Makefile.am b/lnet/autoconf/Makefile.am deleted file mode 100644 index 171634a23ad0c9b9cb19344224f42b48daffd317..0000000000000000000000000000000000000000 --- a/lnet/autoconf/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST := lustre-lnet.m4 diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 deleted file mode 100644 index 2fab63a56bc150c658fd034ea05be3947db9853b..0000000000000000000000000000000000000000 --- a/lnet/autoconf/lustre-lnet.m4 +++ /dev/null @@ -1,1561 +0,0 @@ -# -# LN_CONFIG_MAX_PAYLOAD -# -# configure maximum payload -# -AC_DEFUN([LN_CONFIG_MAX_PAYLOAD], -[AC_MSG_CHECKING([for non-default maximum LNET payload]) -AC_ARG_WITH([max-payload-mb], - AC_HELP_STRING([--with-max-payload-mb=MBytes], - [set maximum lnet payload in MBytes]), - [ - AC_MSG_RESULT([$with_max_payload_mb]) - LNET_MAX_PAYLOAD_MB=$with_max_payload_mb - LNET_MAX_PAYLOAD="(($with_max_payload_mb)<<20)" - ], [ - AC_MSG_RESULT([no]) - LNET_MAX_PAYLOAD="LNET_MTU" - ]) - AC_DEFINE_UNQUOTED(LNET_MAX_PAYLOAD, $LNET_MAX_PAYLOAD, - [Max LNET payload]) -]) - -# -# LN_CHECK_GCC_VERSION -# -# Check compiler version -# -AC_DEFUN([LN_CHECK_GCC_VERSION], -[AC_MSG_CHECKING([compiler version]) -PTL_CC_VERSION=`$CC --version | awk '/^gcc/{print $ 3}'` -PTL_MIN_CC_VERSION="3.2.2" -v2n() { - awk -F. '{printf "%d\n", (($ 1)*100+($ 2))*100+($ 3)}' -} -if test -z "$PTL_CC_VERSION" -o \ - `echo $PTL_CC_VERSION | v2n` -ge `echo $PTL_MIN_CC_VERSION | v2n`; then - AC_MSG_RESULT([ok]) -else - AC_MSG_RESULT([Buggy compiler found]) - AC_MSG_ERROR([Need gcc version >= $PTL_MIN_CC_VERSION]) -fi -]) - -# -# LN_CONFIG_CDEBUG -# -# whether to enable various libcfs debugs (CDEBUG, ENTRY/EXIT, LASSERT, etc.) -# -AC_DEFUN([LN_CONFIG_CDEBUG], -[ -AC_MSG_CHECKING([whether to enable CDEBUG, CWARN]) -AC_ARG_ENABLE([libcfs_cdebug], - AC_HELP_STRING([--disable-libcfs-cdebug], - [disable libcfs CDEBUG, CWARN]), - [],[enable_libcfs_cdebug='yes']) -AC_MSG_RESULT([$enable_libcfs_cdebug]) -if test x$enable_libcfs_cdebug = xyes; then - AC_DEFINE(CDEBUG_ENABLED, 1, [enable libcfs CDEBUG, CWARN]) -else - AC_DEFINE(CDEBUG_ENABLED, 0, [disable libcfs CDEBUG, CWARN]) -fi - -AC_MSG_CHECKING([whether to enable ENTRY/EXIT]) -AC_ARG_ENABLE([libcfs_trace], - AC_HELP_STRING([--disable-libcfs-trace], - [disable libcfs ENTRY/EXIT]), - [],[enable_libcfs_trace='yes']) -AC_MSG_RESULT([$enable_libcfs_trace]) -if test x$enable_libcfs_trace = xyes; then - AC_DEFINE(CDEBUG_ENTRY_EXIT, 1, [enable libcfs ENTRY/EXIT]) -else - AC_DEFINE(CDEBUG_ENTRY_EXIT, 0, [disable libcfs ENTRY/EXIT]) -fi - -AC_MSG_CHECKING([whether to enable LASSERT, LASSERTF]) -AC_ARG_ENABLE([libcfs_assert], - AC_HELP_STRING([--disable-libcfs-assert], - [disable libcfs LASSERT, LASSERTF]), - [],[enable_libcfs_assert='yes']) -AC_MSG_RESULT([$enable_libcfs_assert]) -if test x$enable_libcfs_assert = xyes; then - AC_DEFINE(LIBCFS_DEBUG, 1, [enable libcfs LASSERT, LASSERTF]) -fi -]) - -# -# LN_CONFIG_AFFINITY -# -# check if cpu affinity is available/wanted -# -AC_DEFUN([LN_CONFIG_AFFINITY], -[AC_ARG_ENABLE([affinity], - AC_HELP_STRING([--disable-affinity], - [disable process/irq affinity]), - [],[enable_affinity='yes']) - -AC_MSG_CHECKING([for CPU affinity support]) -if test x$enable_affinity = xno ; then - AC_MSG_RESULT([no (by request)]) -else - LB_LINUX_TRY_COMPILE([ - #include <linux/sched.h> - ],[ - struct task_struct t; - #if HAVE_CPUMASK_T - cpumask_t m; - #else - unsigned long m; - #endif - set_cpus_allowed(&t, m); - ],[ - AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support]) - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no (no kernel support)]) - ]) -fi -]) - -# -# LN_CONFIG_PORTALS -# -# configure support for Portals -# -AC_DEFUN([LN_CONFIG_PORTALS], -[AC_MSG_CHECKING([for portals]) -AC_ARG_WITH([portals], - AC_HELP_STRING([--with-portals=path], - [set path to portals]), - [ - case $with_portals in - no) ENABLEPORTALS=0 - ;; - *) PORTALS="${with_portals}" - ENABLEPORTALS=1 - ;; - esac - ], [ - ENABLEPORTALS=0 - ]) -PTLLNDCPPFLAGS="" -if test $ENABLEPORTALS -eq 0; then - AC_MSG_RESULT([no]) -elif test ! \( -f ${PORTALS}/include/portals/p30.h \); then - AC_MSG_RESULT([no]) - AC_MSG_ERROR([bad --with-portals path]) -else - AC_MSG_RESULT([$PORTALS]) - PTLLNDCPPFLAGS="-I${PORTALS}/include" -fi -AC_SUBST(PTLLNDCPPFLAGS) -]) - -# -# LN_CONFIG_BACKOFF -# -# check if tunable tcp backoff is available/wanted -# -AC_DEFUN([LN_CONFIG_BACKOFF], -[AC_MSG_CHECKING([for tunable backoff TCP support]) -AC_ARG_ENABLE([backoff], - AC_HELP_STRING([--disable-backoff], - [disable socknal tunable backoff]), - [],[enable_backoff='yes']) -if test x$enable_backoff = xno ; then - AC_MSG_RESULT([no (by request)]) -else - BOCD="`grep -c TCP_BACKOFF $LINUX/include/linux/tcp.h`" - if test "$BOCD" != 0 ; then - AC_DEFINE(SOCKNAL_BACKOFF, 1, [use tunable backoff TCP]) - AC_MSG_RESULT(yes) - else - AC_MSG_RESULT([no (no kernel support)]) - fi -fi -]) - -# -# LN_CONFIG_PANIC_DUMPLOG -# -# check if tunable panic_dumplog is wanted -# -AC_DEFUN([LN_CONFIG_PANIC_DUMPLOG], -[AC_MSG_CHECKING([for tunable panic_dumplog support]) -AC_ARG_ENABLE([panic_dumplog], - AC_HELP_STRING([--enable-panic_dumplog], - [enable panic_dumplog]), - [],[enable_panic_dumplog='no']) -if test x$enable_panic_dumplog = xyes ; then - AC_DEFINE(LNET_DUMP_ON_PANIC, 1, [use dumplog on panic]) - AC_MSG_RESULT([yes (by request)]) -else - AC_MSG_RESULT([no]) -fi -]) - -# -# LN_CONFIG_PTLLND -# -# configure support for Portals LND -# -AC_DEFUN([LN_CONFIG_PTLLND], -[ -if test -z "$ENABLEPORTALS"; then - LN_CONFIG_PORTALS -fi - -AC_MSG_CHECKING([whether to build the kernel portals LND]) - -PTLLND="" -if test $ENABLEPORTALS -ne 0; then - AC_MSG_RESULT([yes]) - PTLLND="ptllnd" -else - AC_MSG_RESULT([no]) -fi -AC_SUBST(PTLLND) -]) - -# -# LN_CONFIG_UPTLLND -# -# configure support for Portals LND -# -AC_DEFUN([LN_CONFIG_UPTLLND], -[ -if test -z "$ENABLEPORTALS"; then - LN_CONFIG_PORTALS -fi - -AC_MSG_CHECKING([whether to build the userspace portals LND]) - -UPTLLND="" -if test $ENABLEPORTALS -ne 0; then - AC_MSG_RESULT([yes]) - UPTLLND="ptllnd" -else - AC_MSG_RESULT([no]) -fi -AC_SUBST(UPTLLND) -]) - -# -# LN_CONFIG_USOCKLND -# -# configure support for userspace TCP/IP LND -# -AC_DEFUN([LN_CONFIG_USOCKLND], -[AC_MSG_CHECKING([whether to build usocklnd]) -AC_ARG_ENABLE([usocklnd], - AC_HELP_STRING([--disable-usocklnd], - [disable usocklnd]), - [],[enable_usocklnd='yes']) - -if test x$enable_usocklnd = xyes ; then - if test "$ENABLE_LIBPTHREAD" = "yes" ; then - AC_MSG_RESULT([yes]) - USOCKLND="usocklnd" - else - AC_MSG_RESULT([no (libpthread not present or disabled)]) - USOCKLND="" - fi -else - AC_MSG_RESULT([no (disabled explicitly)]) - USOCKLND="" -fi -AC_SUBST(USOCKLND) -]) - -# -# LN_CONFIG_QUADRICS -# -# check if quadrics support is in this kernel -# -AC_DEFUN([LN_CONFIG_QUADRICS], -[AC_MSG_CHECKING([for QsNet sources]) -AC_ARG_WITH([qsnet], - AC_HELP_STRING([--with-qsnet=path], - [set path to qsnet source (default=$LINUX)]), - [QSNET=$with_qsnet], - [QSNET=$LINUX]) -AC_MSG_RESULT([$QSNET]) - -AC_MSG_CHECKING([if quadrics kernel headers are present]) -if test -d $QSNET/drivers/net/qsnet ; then - AC_MSG_RESULT([yes]) - QSWLND="qswlnd" - AC_MSG_CHECKING([for multirail EKC]) - if test -f $QSNET/include/elan/epcomms.h; then - AC_MSG_RESULT([supported]) - QSWCPPFLAGS="-I$QSNET/include -DMULTIRAIL_EKC=1" - else - AC_MSG_RESULT([not supported]) - AC_MSG_ERROR([Need multirail EKC]) - fi - - if test x$QSNET = x$LINUX ; then - LB_LINUX_CONFIG([QSNET],[],[ - LB_LINUX_CONFIG([QSNET_MODULE],[],[ - AC_MSG_WARN([QSNET is not enabled in this kernel; not building qswlnd.]) - QSWLND="" - QSWCPPFLAGS="" - ]) - ]) - fi -else - AC_MSG_RESULT([no]) - QSWLND="" - QSWCPPFLAGS="" -fi -AC_SUBST(QSWCPPFLAGS) -AC_SUBST(QSWLND) -]) - -# -# LN_CONFIG_GM -# -# check if GM support is available -# -AC_DEFUN([LN_CONFIG_GM],[ -AC_MSG_CHECKING([whether to enable GM support]) -AC_ARG_WITH([gm], - AC_HELP_STRING([--with-gm=path-to-gm-source-tree], - [build gmlnd against path]), - [ - case $with_gm in - no) ENABLE_GM=0 - ;; - *) ENABLE_GM=1 - GM_SRC="$with_gm" - ;; - esac - ],[ - ENABLE_GM=0 - ]) -AC_ARG_WITH([gm-install], - AC_HELP_STRING([--with-gm-install=path-to-gm-install-tree], - [say where GM has been installed]), - [ - GM_INSTALL=$with_gm_install - ],[ - GM_INSTALL="/opt/gm" - ]) -if test $ENABLE_GM -eq 0; then - AC_MSG_RESULT([no]) -else - AC_MSG_RESULT([yes]) - - GMLND="gmlnd" - GMCPPFLAGS="-I$GM_SRC/include -I$GM_SRC/drivers -I$GM_SRC/drivers/linux/gm" - - if test -f $GM_INSTALL/lib/libgm.a -o \ - -f $GM_INSTALL/lib64/libgm.a; then - GMLIBS="-L$GM_INSTALL/lib -L$GM_INSTALL/lib64" - else - AC_MSG_ERROR([Cant find GM libraries under $GM_INSTALL]) - fi - - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$GMCPPFLAGS -DGM_KERNEL $EXTRA_KCFLAGS" - - AC_MSG_CHECKING([that code using GM compiles with given path]) - LB_LINUX_TRY_COMPILE([ - #define GM_STRONG_TYPES 1 - #ifdef VERSION - #undef VERSION - #endif - #include "gm.h" - #include "gm_internal.h" - ],[ - struct gm_port *port = NULL; - gm_recv_event_t *rxevent = gm_blocking_receive_no_spin(port); - return 0; - ],[ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no]) - AC_MSG_ERROR([Bad --with-gm path]) - ]) - - AC_MSG_CHECKING([that GM has gm_register_memory_ex_phys()]) - LB_LINUX_TRY_COMPILE([ - #define GM_STRONG_TYPES 1 - #ifdef VERSION - #undef VERSION - #endif - #include "gm.h" - #include "gm_internal.h" - ],[ - gm_status_t gmrc; - struct gm_port *port = NULL; - gm_u64_t phys = 0; - gm_up_t pvma = 0; - - gmrc = gm_register_memory_ex_phys(port, phys, 100, pvma); - return 0; - ],[ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no. -Please patch the GM sources as follows... - cd $GM_SRC - patch -p0 < $PWD/lnet/klnds/gmlnd/gm-reg-phys.patch -...then rebuild and re-install them]) - AC_MSG_ERROR([Can't build GM without gm_register_memory_ex_phys()]) - ]) - - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(GMCPPFLAGS) -AC_SUBST(GMLIBS) -AC_SUBST(GMLND) -]) - - -# -# LN_CONFIG_MX -# -AC_DEFUN([LN_CONFIG_MX], -[AC_MSG_CHECKING([whether to enable Myrinet MX support]) -# set default -MXPATH="/opt/mx" -AC_ARG_WITH([mx], - AC_HELP_STRING([--with-mx=path], - [build mxlnd against path]), - [ - case $with_mx in - yes) ENABLEMX=2 - ;; - no) ENABLEMX=0 - ;; - *) MXPATH=$with_mx - ENABLEMX=3 - ;; - esac - ],[ - ENABLEMX=1 - ]) -if test $ENABLEMX -eq 0; then - AC_MSG_RESULT([disabled]) -elif test ! \( -f ${MXPATH}/include/myriexpress.h -a \ - -f ${MXPATH}/include/mx_kernel_api.h -a \ - -f ${MXPATH}/include/mx_pin.h \); then - AC_MSG_RESULT([no]) - case $ENABLEMX in - 1) ;; - 2) AC_MSG_ERROR([Myrinet MX kernel headers not present]);; - 3) AC_MSG_ERROR([bad --with-mx path]);; - *) AC_MSG_ERROR([internal error]);; - esac -else - MXCPPFLAGS="-I$MXPATH/include" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $MXCPPFLAGS" - MXLIBS="-L$MXPATH/lib" - LB_LINUX_TRY_COMPILE([ - #define MX_KERNEL 1 - #include <mx_extensions.h> - #include <myriexpress.h> - ],[ - mx_endpoint_t end; - mx_status_t status; - mx_request_t request; - int result; - - mx_init(); - mx_open_endpoint(MX_ANY_NIC, MX_ANY_ENDPOINT, 0, NULL, 0, &end); - mx_register_unexp_handler(end, (mx_unexp_handler_t) NULL, NULL); - mx_wait_any(end, MX_INFINITE, 0LL, 0LL, &status, &result); - mx_iconnect(end, 0LL, 0, 0, 0, NULL, &request); - return 0; - ],[ - AC_MSG_RESULT([yes]) - MXLND="mxlnd" - ],[ - AC_MSG_RESULT([no]) - case $ENABLEMX in - 1) ;; - 2) AC_MSG_ERROR([can't compile with Myrinet MX kernel headers]);; - 3) AC_MSG_ERROR([can't compile with Myrinet MX headers under $MXPATH]);; - *) AC_MSG_ERROR([internal error]);; - esac - MXLND="" - MXCPPFLAGS="" - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(MXCPPFLAGS) -AC_SUBST(MXLIBS) -AC_SUBST(MXLND) -]) - - - -# -# LN_CONFIG_O2IB -# -AC_DEFUN([LN_CONFIG_O2IB],[ -AC_MSG_CHECKING([whether to enable OpenIB gen2 support]) -# set default -AC_ARG_WITH([o2ib], - AC_HELP_STRING([--with-o2ib=path], - [build o2iblnd against path]), - [ - case $with_o2ib in - yes) O2IBPATHS="$LINUX $LINUX/drivers/infiniband" - ENABLEO2IB=2 - ;; - no) ENABLEO2IB=0 - ;; - *) O2IBPATHS=$with_o2ib - ENABLEO2IB=3 - ;; - esac - ],[ - O2IBPATHS="$LINUX $LINUX/drivers/infiniband" - ENABLEO2IB=1 - ]) -if test $ENABLEO2IB -eq 0; then - AC_MSG_RESULT([disabled]) -else - o2ib_found=false - for O2IBPATH in $O2IBPATHS; do - if test \( -f ${O2IBPATH}/include/rdma/rdma_cm.h -a \ - -f ${O2IBPATH}/include/rdma/ib_cm.h -a \ - -f ${O2IBPATH}/include/rdma/ib_verbs.h -a \ - -f ${O2IBPATH}/include/rdma/ib_fmr_pool.h \); then - o2ib_found=true - break - fi - done - if ! $o2ib_found; then - AC_MSG_RESULT([no]) - case $ENABLEO2IB in - 1) ;; - 2) AC_MSG_ERROR([kernel OpenIB gen2 headers not present]);; - 3) AC_MSG_ERROR([bad --with-o2ib path]);; - *) AC_MSG_ERROR([internal error]);; - esac - else - O2IBCPPFLAGS="-I$O2IBPATH/include" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $O2IBCPPFLAGS" - EXTRA_LNET_INCLUDE="$O2IBCPPFLAGS $EXTRA_LNET_INCLUDE" - LB_LINUX_TRY_COMPILE([ - #include <linux/version.h> - #include <linux/pci.h> - #if !HAVE_GFP_T - typedef int gfp_t; - #endif - #include <rdma/rdma_cm.h> - #include <rdma/ib_cm.h> - #include <rdma/ib_verbs.h> - #include <rdma/ib_fmr_pool.h> - ],[ - struct rdma_cm_id *cm_id; - struct rdma_conn_param conn_param; - struct ib_device_attr device_attr; - struct ib_qp_attr qp_attr; - struct ib_pool_fmr pool_fmr; - enum ib_cm_rej_reason rej_reason; - - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); - return PTR_ERR(cm_id); - ],[ - AC_MSG_RESULT([yes]) - O2IBLND="o2iblnd" - ],[ - AC_MSG_RESULT([no]) - case $ENABLEO2IB in - 1) ;; - 2) AC_MSG_ERROR([can't compile with kernel OpenIB gen2 headers]);; - 3) AC_MSG_ERROR([can't compile with OpenIB gen2 headers under $O2IBPATH]);; - *) AC_MSG_ERROR([internal error]);; - esac - O2IBLND="" - O2IBCPPFLAGS="" - ]) - - # we know at this point that the found OFED source is good - if test \( $ENABLEO2IB = 3 \); then - if test \( -f $O2IBPATH/Module.symvers \); then - AC_MSG_NOTICE([adding $O2IBPATH/Module.symvers to $PWD/$SYMVERFILE]) - # strip out the existing symbols versions first - touch $O2IBPATH/Module.symvers - egrep -v $(echo $(awk '{ print $2 }' $O2IBPATH/Module.symvers) | tr ' ' '|') $PWD/$SYMVERFILE > $PWD/$SYMVERFILE.old - cat $PWD/$SYMVERFILE.old $O2IBPATH/Module.symvers > $PWD/$SYMVERFILE - else - AC_MSG_ERROR([an external source tree was specified for o2iblnd however I could not find a $O2IBPATH/Module.symvers there]) - fi - fi - - # version checking is a hack and isn't reliable, - # we need verify it with each new ofed release - - if grep -q ib_dma_map_single \ - ${O2IBPATH}/include/rdma/ib_verbs.h; then - if grep -q comp_vector \ - ${O2IBPATH}/include/rdma/ib_verbs.h; then - IBLND_OFED_VERSION="1025" - else - IBLND_OFED_VERSION="1020" - fi - else - IBLND_OFED_VERSION="1010" - fi - - AC_DEFINE_UNQUOTED(IBLND_OFED_VERSION, $IBLND_OFED_VERSION, - [OFED version]) - - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" - fi -fi - -AC_SUBST(EXTRA_LNET_INCLUDE) -AC_SUBST(O2IBCPPFLAGS) -AC_SUBST(O2IBLND) -]) - -# -# LN_CONFIG_OPENIB -# -# check for OpenIB in the kernel -AC_DEFUN([LN_CONFIG_OPENIB],[ -AC_MSG_CHECKING([whether to enable OpenIB support]) -# set default -OPENIBPATH="$LINUX/drivers/infiniband" -AC_ARG_WITH([openib], - AC_HELP_STRING([--with-openib=path], - [build openiblnd against path]), - [ - case $with_openib in - yes) ENABLEOPENIB=2 - ;; - no) ENABLEOPENIB=0 - ;; - *) OPENIBPATH="$with_openib" - ENABLEOPENIB=3 - ;; - esac - ],[ - ENABLEOPENIB=1 - ]) -if test $ENABLEOPENIB -eq 0; then - AC_MSG_RESULT([disabled]) -elif test ! \( -f ${OPENIBPATH}/include/ts_ib_core.h -a \ - -f ${OPENIBPATH}/include/ts_ib_cm.h -a \ - -f ${OPENIBPATH}/include/ts_ib_sa_client.h \); then - AC_MSG_RESULT([no]) - case $ENABLEOPENIB in - 1) ;; - 2) AC_MSG_ERROR([kernel OpenIB headers not present]);; - 3) AC_MSG_ERROR([bad --with-openib path]);; - *) AC_MSG_ERROR([internal error]);; - esac -else - case $ENABLEOPENIB in - 1|2) OPENIBCPPFLAGS="-I$OPENIBPATH/include -DIN_TREE_BUILD";; - 3) OPENIBCPPFLAGS="-I$OPENIBPATH/include";; - *) AC_MSG_RESULT([no]) - AC_MSG_ERROR([internal error]);; - esac - OPENIBCPPFLAGS="$OPENIBCPPFLAGS -DIB_NTXRXPARAMS=4" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $OPENIBCPPFLAGS" - LB_LINUX_TRY_COMPILE([ - #include <ts_ib_core.h> - #include <ts_ib_cm.h> - #include <ts_ib_sa_client.h> - ],[ - struct ib_device_properties dev_props; - struct ib_cm_active_param cm_active_params; - tTS_IB_CLIENT_QUERY_TID tid; - int enum1 = IB_QP_ATTRIBUTE_STATE; - int enum2 = IB_ACCESS_LOCAL_WRITE; - int enum3 = IB_CQ_CALLBACK_INTERRUPT; - int enum4 = IB_CQ_PROVIDER_REARM; - return 0; - ],[ - AC_MSG_RESULT([yes]) - OPENIBLND="openiblnd" - ],[ - AC_MSG_RESULT([no]) - case $ENABLEOPENIB in - 1) ;; - 2) AC_MSG_ERROR([can't compile with kernel OpenIB headers]);; - 3) AC_MSG_ERROR([can't compile with OpenIB headers under $OPENIBPATH]);; - *) AC_MSG_ERROR([internal error]);; - esac - OPENIBLND="" - OPENIBCPPFLAGS="" - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(OPENIBCPPFLAGS) -AC_SUBST(OPENIBLND) -]) - -# -# LN_CONFIG_CIBLND -# -AC_DEFUN([LN_CONFIG_CIB],[ -AC_MSG_CHECKING([whether to enable Cisco/TopSpin IB support]) -# set default -CIBPATH="" -CIBLND="" -AC_ARG_WITH([cib], - AC_HELP_STRING([--with-cib=path], - [build ciblnd against path]), - [ - case $with_cib in - no) AC_MSG_RESULT([no]);; - *) CIBPATH="$with_cib" - if test -d "$CIBPATH"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - AC_MSG_ERROR([No directory $CIBPATH]) - fi;; - esac - ],[ - AC_MSG_RESULT([no]) - ]) -if test -n "$CIBPATH"; then - CIBCPPFLAGS="-I${CIBPATH}/ib/ts_api_ng/include -I${CIBPATH}/all/kernel_services/include -DUSING_TSAPI" - CIBCPPFLAGS="$CIBCPPFLAGS -DIB_NTXRXPARAMS=3" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $CIBCPPFLAGS" - LB_LINUX_TRY_COMPILE([ - #include <ts_ib_core.h> - #include <ts_ib_cm.h> - #include <ts_ib_sa_client.h> - ],[ - struct ib_device_properties dev_props; - struct ib_cm_active_param cm_active_params; - tTS_IB_CLIENT_QUERY_TID tid; - int enum1 = TS_IB_QP_ATTRIBUTE_STATE; - int enum2 = TS_IB_ACCESS_LOCAL_WRITE; - int enum3 = TS_IB_CQ_CALLBACK_INTERRUPT; - int enum4 = TS_IB_CQ_PROVIDER_REARM; - return 0; - ],[ - CIBLND="ciblnd" - ],[ - AC_MSG_ERROR([can't compile ciblnd with given path]) - CIBCPPFLAGS="" - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(CIBCPPFLAGS) -AC_SUBST(CIBLND) -]) - -# -# LN_CONFIG_IIB -# -# check for infinicon infiniband support -# -AC_DEFUN([LN_CONFIG_IIB],[ -AC_MSG_CHECKING([whether to enable Infinicon support]) -# set default -IIBPATH="/usr/include" -AC_ARG_WITH([iib], - AC_HELP_STRING([--with-iib=path], - [build iiblnd against path]), - [ - case $with_iib in - yes) ENABLEIIB=2 - ;; - no) ENABLEIIB=0 - ;; - *) IIBPATH="${with_iib}/include" - ENABLEIIB=3 - ;; - esac - ],[ - ENABLEIIB=1 - ]) -if test $ENABLEIIB -eq 0; then - AC_MSG_RESULT([disabled]) -elif test ! \( -f ${IIBPATH}/linux/iba/ibt.h \); then - AC_MSG_RESULT([no]) - case $ENABLEIIB in - 1) ;; - 2) AC_MSG_ERROR([default Infinicon headers not present]);; - 3) AC_MSG_ERROR([bad --with-iib path]);; - *) AC_MSG_ERROR([internal error]);; - esac -else - IIBCPPFLAGS="-I$IIBPATH" - if test $IIBPATH != "/usr/include"; then - # we need /usr/include come what may - IIBCPPFLAGS="$IIBCPPFLAGS -I/usr/include" - fi - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $IIBCPPFLAGS" - LB_LINUX_TRY_COMPILE([ - #include <linux/iba/ibt.h> - ],[ - IBT_INTERFACE_UNION interfaces; - FSTATUS rc; - - rc = IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2, - &interfaces); - - return rc == FSUCCESS ? 0 : 1; - ],[ - AC_MSG_RESULT([yes]) - IIBLND="iiblnd" - ],[ - AC_MSG_RESULT([no]) - case $ENABLEIIB in - 1) ;; - 2) AC_MSG_ERROR([can't compile with default Infinicon headers]);; - 3) AC_MSG_ERROR([can't compile with Infinicon headers under $IIBPATH]);; - *) AC_MSG_ERROR([internal error]);; - esac - IIBLND="" - IIBCPPFLAGS="" - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(IIBCPPFLAGS) -AC_SUBST(IIBLND) -]) - -# -# LN_CONFIG_VIB -# -# check for Voltaire infiniband support -# -AC_DEFUN([LN_CONFIG_VIB], -[AC_MSG_CHECKING([whether to enable Voltaire IB support]) -VIBPATH="" -AC_ARG_WITH([vib], - AC_HELP_STRING([--with-vib=path], - [build viblnd against path]), - [ - case $with_vib in - no) AC_MSG_RESULT([no]);; - *) VIBPATH="${with_vib}/src/nvigor/ib-code" - if test -d "$with_vib" -a -d "$VIBPATH"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - AC_MSG_ERROR([No directory $VIBPATH]) - fi;; - esac - ],[ - AC_MSG_RESULT([no]) - ]) -if test -z "$VIBPATH"; then - VIBLND="" -else - VIBCPPFLAGS="-I${VIBPATH}/include -I${VIBPATH}/cm" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $VIBCPPFLAGS" - LB_LINUX_TRY_COMPILE([ - #include <linux/list.h> - #include <asm/byteorder.h> - #ifdef __BIG_ENDIAN - # define CPU_BE 1 - # define CPU_LE 0 - #endif - #ifdef __LITTLE_ENDIAN - # define CPU_BE 0 - # define CPU_LE 1 - #endif - #include <vverbs.h> - #include <ib-cm.h> - #include <ibat.h> - ],[ - vv_hca_h_t kib_hca; - vv_return_t vvrc; - cm_cep_handle_t cep; - ibat_arp_data_t arp_data; - ibat_stat_t ibatrc; - - vvrc = vv_hca_open("ANY_HCA", NULL, &kib_hca); - cep = cm_create_cep(cm_cep_transp_rc); - ibatrc = ibat_get_ib_data((uint32_t)0, (uint32_t)0, - ibat_paths_primary, &arp_data, - (ibat_get_ib_data_reply_fn_t)NULL, - NULL, 0); - return 0; - ],[ - VIBLND="viblnd" - ],[ - AC_MSG_ERROR([can't compile viblnd with given path]) - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -if test -n "$VIBLND"; then - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $VIBCPPFLAGS" - AC_MSG_CHECKING([if Voltaire still uses void * sg addresses]) - LB_LINUX_TRY_COMPILE([ - #include <linux/list.h> - #include <asm/byteorder.h> - #ifdef __BIG_ENDIAN - # define CPU_BE 1 - # define CPU_LE 0 - #endif - #ifdef __LITTLE_ENDIAN - # define CPU_BE 0 - # define CPU_LE 1 - #endif - #include <vverbs.h> - #include <ib-cm.h> - #include <ibat.h> - ],[ - vv_scatgat_t sg; - - return &sg.v_address[3] == NULL; - ],[ - AC_MSG_RESULT([yes]) - VIBCPPFLAGS="$VIBCPPFLAGS -DIBNAL_VOIDSTAR_SGADDR=1" - ],[ - AC_MSG_RESULT([no]) - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(VIBCPPFLAGS) -AC_SUBST(VIBLND) -]) - -# -# LN_CONFIG_RALND -# -# check whether to use the RapidArray lnd -# -AC_DEFUN([LN_CONFIG_RALND], -[#### Rapid Array -AC_MSG_CHECKING([if RapidArray kernel headers are present]) -# placeholder -RACPPFLAGS="-I${LINUX}/drivers/xd1/include" -EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" -EXTRA_KCFLAGS="$EXTRA_KCFLAGS $RACPPFLAGS" -LB_LINUX_TRY_COMPILE([ - #include <linux/types.h> - #include <rapl.h> -],[ - RAP_RETURN rc; - RAP_PVOID dev_handle; - - rc = RapkGetDeviceByIndex(0, NULL, &dev_handle); - - return rc == RAP_SUCCESS ? 0 : 1; -],[ - AC_MSG_RESULT([yes]) - RALND="ralnd" -],[ - AC_MSG_RESULT([no]) - RALND="" - RACPPFLAGS="" -]) -EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -AC_SUBST(RACPPFLAGS) -AC_SUBST(RALND) -]) - -# -# LN_STRUCT_PAGE_LIST -# -# 2.6.4 no longer has page->list -# -AC_DEFUN([LN_STRUCT_PAGE_LIST], -[AC_MSG_CHECKING([if struct page has a list field]) -LB_LINUX_TRY_COMPILE([ - #include <linux/mm.h> -],[ - struct page page; - &page.list; -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_STRUCT_SIGHAND -# -# red hat 2.4 adds sighand to struct task_struct -# -AC_DEFUN([LN_STRUCT_SIGHAND], -[AC_MSG_CHECKING([if task_struct has a sighand field]) -LB_LINUX_TRY_COMPILE([ - #include <linux/sched.h> -],[ - struct task_struct p; - p.sighand = NULL; -],[ - AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches]) - AC_MSG_RESULT([yes]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_FUNC_CPU_ONLINE -# -# cpu_online is different in rh 2.4, vanilla 2.4, and 2.6 -# -AC_DEFUN([LN_FUNC_CPU_ONLINE], -[AC_MSG_CHECKING([if kernel defines cpu_online()]) -LB_LINUX_TRY_COMPILE([ - #include <linux/sched.h> -],[ - cpu_online(0); -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_CPU_ONLINE, 1, [cpu_online found]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_TYPE_GFP_T -# -# check if gfp_t is typedef-ed -# -AC_DEFUN([LN_TYPE_GFP_T], -[AC_MSG_CHECKING([if kernel defines gfp_t]) -LB_LINUX_TRY_COMPILE([ - #include <linux/gfp.h> -],[ - return sizeof(gfp_t); -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_GFP_T, 1, [gfp_t found]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_TYPE_CPUMASK_T -# -# same goes for cpumask_t -# -AC_DEFUN([LN_TYPE_CPUMASK_T], -[AC_MSG_CHECKING([if kernel defines cpumask_t]) -LB_LINUX_TRY_COMPILE([ - #include <linux/sched.h> -],[ - return sizeof (cpumask_t); -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_CPUMASK_T, 1, [cpumask_t found]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_FUNC_SHOW_TASK -# -# we export show_task(), but not all kernels have it (yet) -# -AC_DEFUN([LN_FUNC_SHOW_TASK], -[LB_CHECK_SYMBOL_EXPORT([show_task], -[kernel/ksyms.c kernel/sched.c],[ -AC_DEFINE(HAVE_SHOW_TASK, 1, [show_task is exported]) -],[ -]) -]) - -# check userland __u64 type -AC_DEFUN([LN_U64_LONG_LONG], -[AC_MSG_CHECKING([u64 is long long type]) -tmp_flags="$CFLAGS" -CFLAGS="$CFLAGS -Werror" -AC_COMPILE_IFELSE([ - #include <linux/types.h> - int main(void) { - unsigned long long *data1; - __u64 *data2; - - data1 = data2; - return 0; - } -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_U64_LONG_LONG, 1, - [__u64 is long long type]) -],[ - AC_MSG_RESULT([no]) -]) -CFLAGS="$tmp_flags" -]) - -# check userland size_t type -AC_DEFUN([LN_SIZE_T_LONG], -[AC_MSG_CHECKING([size_t is unsigned long type]) -tmp_flags="$CFLAGS" -CFLAGS="$CFLAGS -Werror" -AC_COMPILE_IFELSE([ - #include <linux/types.h> - int main(void) { - unsigned long *data1; - size_t *data2; - - data1 = data2; - return 0; - } -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_SIZE_T_LONG, 1, - [size_t is long type]) -],[ - AC_MSG_RESULT([no]) -]) -CFLAGS="$tmp_flags" -]) - -AC_DEFUN([LN_SSIZE_T_LONG], -[AC_MSG_CHECKING([ssize_t is signed long type]) -tmp_flags="$CFLAGS" -CFLAGS="$CFLAGS -Werror" -AC_COMPILE_IFELSE([ - #include <linux/types.h> - int main(void) { - long *data1; - ssize_t *data2; - - data1 = data2; - return 0; - } -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_SSIZE_T_LONG, 1, - [ssize_t is long type]) -],[ - AC_MSG_RESULT([no]) -]) -CFLAGS="$tmp_flags" -]) - - -# LN_TASKLIST_LOCK -# 2.6.18 remove tasklist_lock export -AC_DEFUN([LN_TASKLIST_LOCK], -[LB_CHECK_SYMBOL_EXPORT([tasklist_lock], -[kernel/fork.c],[ -AC_DEFINE(HAVE_TASKLIST_LOCK, 1, - [tasklist_lock exported]) -],[ -]) -]) - -# 2.6.19 API changes -# kmem_cache_destroy(cachep) return void instead of -# int -AC_DEFUN([LN_KMEM_CACHE_DESTROY_INT], -[AC_MSG_CHECKING([kmem_cache_destroy(cachep) return int]) -LB_LINUX_TRY_COMPILE([ - #include <linux/slab.h> -],[ - int i = kmem_cache_destroy(NULL); -],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT, 1, - [kmem_cache_destroy(cachep) return int]) -],[ - AC_MSG_RESULT(NO) -]) -]) - -# 2.6.19 API change -#panic_notifier_list use atomic_notifier operations -# -AC_DEFUN([LN_ATOMIC_PANIC_NOTIFIER], -[AC_MSG_CHECKING([panic_notifier_list is atomic]) -LB_LINUX_TRY_COMPILE([ - #include <linux/notifier.h> - #include <linux/kernel.h> -],[ - struct atomic_notifier_head panic_notifier_list; -],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ATOMIC_PANIC_NOTIFIER, 1, - [panic_notifier_list is atomic_notifier_head]) -],[ - AC_MSG_RESULT(NO) -]) -]) - -# 2.6.20 API change INIT_WORK use 2 args and not -# store data inside -AC_DEFUN([LN_3ARGS_INIT_WORK], -[AC_MSG_CHECKING([check INIT_WORK want 3 args]) -LB_LINUX_TRY_COMPILE([ - #include <linux/workqueue.h> -],[ - struct work_struct work; - - INIT_WORK(&work, NULL, NULL); -],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_3ARGS_INIT_WORK, 1, - [INIT_WORK use 3 args and store data inside]) -],[ - AC_MSG_RESULT(NO) -]) -]) - -# 2.6.21 api change. 'register_sysctl_table' use only one argument, -# instead of more old which need two. -AC_DEFUN([LN_2ARGS_REGISTER_SYSCTL], -[AC_MSG_CHECKING([check register_sysctl_table want 2 args]) -LB_LINUX_TRY_COMPILE([ - #include <linux/sysctl.h> -],[ - return register_sysctl_table(NULL,0); -],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_2ARGS_REGISTER_SYSCTL, 1, - [register_sysctl_table want 2 args]) -],[ - AC_MSG_RESULT(NO) -]) -]) - -# 2.6.21 marks kmem_cache_t deprecated and uses struct kmem_cache -# instead -AC_DEFUN([LN_KMEM_CACHE], -[AC_MSG_CHECKING([check kernel has struct kmem_cache]) -tmp_flags="$EXTRA_KCFLAGS" -EXTRA_KCFLAGS="-Werror" -LB_LINUX_TRY_COMPILE([ - #include <linux/slab.h> - typedef struct kmem_cache cache_t; -],[ - cache_t *cachep = NULL; - - kmem_cache_alloc(cachep, 0); -],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE, 1, - [kernel has struct kmem_cache]) -],[ - AC_MSG_RESULT(NO) -]) -EXTRA_KCFLAGS="$tmp_flags" -]) -# 2.6.23 lost dtor argument -AC_DEFUN([LN_KMEM_CACHE_CREATE_DTOR], -[AC_MSG_CHECKING([check kmem_cache_create has dtor argument]) -LB_LINUX_TRY_COMPILE([ - #include <linux/slab.h> -],[ - kmem_cache_create(NULL, 0, 0, 0, NULL, NULL); -],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE_CREATE_DTOR, 1, - [kmem_cache_create has dtor argument]) -],[ - AC_MSG_RESULT(NO) -]) -]) - -# -# LN_PROG_LINUX -# -# LNet linux kernel checks -# -AC_DEFUN([LN_PROG_LINUX], -[ -LN_FUNC_CPU_ONLINE -LN_TYPE_GFP_T -LN_TYPE_CPUMASK_T -LN_CONFIG_AFFINITY -LN_CONFIG_BACKOFF -LN_CONFIG_PANIC_DUMPLOG -LN_CONFIG_QUADRICS -LN_CONFIG_GM -LN_CONFIG_OPENIB -LN_CONFIG_CIB -LN_CONFIG_VIB -LN_CONFIG_IIB -LN_CONFIG_O2IB -LN_CONFIG_RALND -LN_CONFIG_PTLLND -LN_CONFIG_MX - -LN_STRUCT_PAGE_LIST -LN_STRUCT_SIGHAND -LN_FUNC_SHOW_TASK -LN_U64_LONG_LONG -LN_SSIZE_T_LONG -LN_SIZE_T_LONG -# 2.6.18 -LN_TASKLIST_LOCK -# 2.6.19 -LN_KMEM_CACHE_DESTROY_INT -LN_ATOMIC_PANIC_NOTIFIER -# 2.6.20 -LN_3ARGS_INIT_WORK -# 2.6.21 -LN_2ARGS_REGISTER_SYSCTL -LN_KMEM_CACHE -# 2.6.23 -LN_KMEM_CACHE_CREATE_DTOR -]) - -# -# LN_PROG_DARWIN -# -# Darwin checks -# -AC_DEFUN([LN_PROG_DARWIN], -[LB_DARWIN_CHECK_FUNCS([get_preemption_level]) -]) - -# -# LN_PATH_DEFAULTS -# -# default paths for installed files -# -AC_DEFUN([LN_PATH_DEFAULTS], -[ -]) - -# -# LN_CONFIGURE -# -# other configure checks -# -AC_DEFUN([LN_CONFIGURE], -[# lnet/utils/portals.c -AC_CHECK_HEADERS([netdb.h netinet/tcp.h asm/types.h endian.h sys/ioctl.h]) -AC_CHECK_FUNCS([gethostbyname socket connect]) - -# lnet/utils/debug.c -AC_CHECK_HEADERS([linux/version.h]) - -AC_CHECK_TYPE([spinlock_t], - [AC_DEFINE(HAVE_SPINLOCK_T, 1, [spinlock_t is defined])], - [], - [#include <linux/spinlock.h>]) - -# lnet/utils/wirecheck.c -AC_CHECK_FUNCS([strnlen]) - -# -------- Check for required packages -------------- - -# -# LC_CONFIG_READLINE -# -# Build with readline -# -AC_MSG_CHECKING([whether to enable readline support]) -AC_ARG_ENABLE(readline, - AC_HELP_STRING([--disable-readline], - [disable readline support]), - [],[enable_readline='yes']) -AC_MSG_RESULT([$enable_readline]) - -# -------- check for readline if enabled ---- -if test x$enable_readline = xyes ; then - LIBS_save="$LIBS" - LIBS="-lncurses $LIBS" - AC_CHECK_LIB([readline],[readline],[ - LIBREADLINE="-lreadline -lncurses" - AC_DEFINE(HAVE_LIBREADLINE, 1, [readline library is available]) - ],[ - LIBREADLINE="" - ]) - LIBS="$LIBS_save" -else - LIBREADLINE="" -fi -AC_SUBST(LIBREADLINE) - -AC_MSG_CHECKING([if efence debugging support is requested]) -AC_ARG_ENABLE(efence, - AC_HELP_STRING([--enable-efence], - [use efence library]), - [],[enable_efence='no']) -AC_MSG_RESULT([$enable_efence]) -if test "$enable_efence" = "yes" ; then - LIBEFENCE="-lefence" - AC_DEFINE(HAVE_LIBEFENCE, 1, [libefence support is requested]) -else - LIBEFENCE="" -fi -AC_SUBST(LIBEFENCE) - -# -------- enable acceptor libwrap (TCP wrappers) support? ------- -AC_MSG_CHECKING([if libwrap support is requested]) -AC_ARG_ENABLE([libwrap], - AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]), - [case "${enableval}" in - yes) enable_libwrap=yes ;; - no) enable_libwrap=no ;; - *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;; - esac],[enable_libwrap=no]) -AC_MSG_RESULT([$enable_libwrap]) -if test x$enable_libwrap = xyes ; then - LIBWRAP="-lwrap" - AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested]) -else - LIBWRAP="" -fi -AC_SUBST(LIBWRAP) - -# -------- check for -lpthread support ---- -AC_MSG_CHECKING([whether to use libpthread for lnet library]) -AC_ARG_ENABLE([libpthread], - AC_HELP_STRING([--disable-libpthread], - [disable libpthread]), - [],[enable_libpthread=yes]) -if test "$enable_libpthread" = "yes" ; then - AC_CHECK_LIB([pthread], [pthread_create], - [ENABLE_LIBPTHREAD="yes"], - [ENABLE_LIBPTHREAD="no"]) - if test "$ENABLE_LIBPTHREAD" = "yes" ; then - AC_MSG_RESULT([$ENABLE_LIBPTHREAD]) - PTHREAD_LIBS="-lpthread" - AC_DEFINE([HAVE_LIBPTHREAD], 1, [use libpthread]) - else - PTHREAD_LIBS="" - AC_MSG_RESULT([no libpthread is found]) - fi - AC_SUBST(PTHREAD_LIBS) -else - AC_MSG_RESULT([no (disabled explicitly)]) - ENABLE_LIBPTHREAD="no" -fi -AC_SUBST(ENABLE_LIBPTHREAD) - -# ---------------------------------------- -# some tests for catamount-like systems -# ---------------------------------------- -AC_ARG_ENABLE([sysio_init], - AC_HELP_STRING([--disable-sysio-init], - [call sysio init functions when initializing liblustre]), - [],[enable_sysio_init=yes]) -AC_MSG_CHECKING([whether to initialize libsysio]) -AC_MSG_RESULT([$enable_sysio_init]) -if test x$enable_sysio_init != xno ; then - AC_DEFINE([INIT_SYSIO], 1, [call sysio init functions]) -fi - -AC_ARG_ENABLE([urandom], - AC_HELP_STRING([--disable-urandom], - [disable use of /dev/urandom for liblustre]), - [],[enable_urandom=yes]) -AC_MSG_CHECKING([whether to use /dev/urandom for liblustre]) -AC_MSG_RESULT([$enable_urandom]) -if test x$enable_urandom != xno ; then - AC_DEFINE([LIBLUSTRE_USE_URANDOM], 1, [use /dev/urandom for random data]) -fi - -# -------- check for -lcap support ---- -if test x$enable_liblustre = xyes ; then - AC_CHECK_LIB([cap], [cap_get_proc], - [ - CAP_LIBS="-lcap" - AC_DEFINE([HAVE_LIBCAP], 1, [use libcap]) - ], - [ - CAP_LIBS="" - ]) - AC_SUBST(CAP_LIBS) - -fi - -LN_CONFIG_MAX_PAYLOAD -LN_CONFIG_UPTLLND -LN_CONFIG_USOCKLND -]) - -# -# LN_CONDITIONALS -# -# AM_CONDITOINAL defines for lnet -# -AC_DEFUN([LN_CONDITIONALS], -[AM_CONDITIONAL(BUILD_QSWLND, test x$QSWLND = "xqswlnd") -AM_CONDITIONAL(BUILD_GMLND, test x$GMLND = "xgmlnd") -AM_CONDITIONAL(BUILD_MXLND, test x$MXLND = "xmxlnd") -AM_CONDITIONAL(BUILD_O2IBLND, test x$O2IBLND = "xo2iblnd") -AM_CONDITIONAL(BUILD_OPENIBLND, test x$OPENIBLND = "xopeniblnd") -AM_CONDITIONAL(BUILD_CIBLND, test x$CIBLND = "xciblnd") -AM_CONDITIONAL(BUILD_IIBLND, test x$IIBLND = "xiiblnd") -AM_CONDITIONAL(BUILD_VIBLND, test x$VIBLND = "xviblnd") -AM_CONDITIONAL(BUILD_RALND, test x$RALND = "xralnd") -AM_CONDITIONAL(BUILD_PTLLND, test x$PTLLND = "xptllnd") -AM_CONDITIONAL(BUILD_UPTLLND, test x$UPTLLND = "xptllnd") -AM_CONDITIONAL(BUILD_USOCKLND, test x$USOCKLND = "xusocklnd") -]) - -# -# LN_CONFIG_FILES -# -# files that should be generated with AC_OUTPUT -# -AC_DEFUN([LN_CONFIG_FILES], -[AC_CONFIG_FILES([ -lnet/Kernelenv -lnet/Makefile -lnet/autoMakefile -lnet/autoconf/Makefile -lnet/doc/Makefile -lnet/include/Makefile -lnet/include/libcfs/Makefile -lnet/include/libcfs/linux/Makefile -lnet/include/lnet/Makefile -lnet/include/lnet/linux/Makefile -lnet/klnds/Makefile -lnet/klnds/autoMakefile -lnet/klnds/gmlnd/Makefile -lnet/klnds/mxlnd/autoMakefile -lnet/klnds/mxlnd/Makefile -lnet/klnds/gmlnd/autoMakefile -lnet/klnds/openiblnd/Makefile -lnet/klnds/openiblnd/autoMakefile -lnet/klnds/o2iblnd/Makefile -lnet/klnds/o2iblnd/autoMakefile -lnet/klnds/ciblnd/Makefile -lnet/klnds/ciblnd/autoMakefile -lnet/klnds/iiblnd/Makefile -lnet/klnds/iiblnd/autoMakefile -lnet/klnds/viblnd/Makefile -lnet/klnds/viblnd/autoMakefile -lnet/klnds/qswlnd/Makefile -lnet/klnds/qswlnd/autoMakefile -lnet/klnds/ralnd/Makefile -lnet/klnds/ralnd/autoMakefile -lnet/klnds/socklnd/Makefile -lnet/klnds/socklnd/autoMakefile -lnet/klnds/ptllnd/Makefile -lnet/klnds/ptllnd/autoMakefile -lnet/libcfs/Makefile -lnet/libcfs/autoMakefile -lnet/libcfs/linux/Makefile -lnet/lnet/Makefile -lnet/lnet/autoMakefile -lnet/selftest/Makefile -lnet/selftest/autoMakefile -lnet/ulnds/Makefile -lnet/ulnds/autoMakefile -lnet/ulnds/socklnd/Makefile -lnet/ulnds/ptllnd/Makefile -lnet/utils/Makefile -]) -case $lb_target_os in - darwin) - AC_CONFIG_FILES([ -lnet/include/libcfs/darwin/Makefile -lnet/include/lnet/darwin/Makefile -lnet/libcfs/darwin/Makefile -]) - ;; -esac -]) diff --git a/lnet/doc/.cvsignore b/lnet/doc/.cvsignore deleted file mode 100644 index 827dca41301e5e078edf5fcef01e7cbc0b081b10..0000000000000000000000000000000000000000 --- a/lnet/doc/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -Makefile -Makefile.in -*.eps -*.pdf diff --git a/lnet/doc/Data-structures b/lnet/doc/Data-structures deleted file mode 100644 index b5532b1b8d1b96c06f385d0bf218df6a38165726..0000000000000000000000000000000000000000 --- a/lnet/doc/Data-structures +++ /dev/null @@ -1,65 +0,0 @@ -In this document I will try to draw the data structures and how they -interrelate in the Portals 3 reference implementation. It is probably -best shown with a drawing, so there may be an additional xfig or -Postscript figure. - - -MEMORY POOLS: ------------- - -First, a digression on memory allocation in the library. As mentioned -in the NAL Writer's Guide, the library does not link against any -standard C libraries and as such is unable to dynamically allocate -memory on its own. It requires that the NAL implement a method -for allocation that is appropriate for the protection domain in -which the library lives. This is only called when a network -interface is initialized to allocate the Portals object pools. - -These pools are preallocate blocks of objects that the library -can rapidly make active and manage with a minimum of overhead. -It is also cuts down on overhead for setting up structures -since the NAL->malloc() callback does not need to be called -for each object. - -The objects are maintained on a per-object type singly linked free -list and contain a pointer to the next free object. This pointer -is NULL if the object is not on the free list and is non-zero -if it is on the list. The special sentinal value of 0xDEADBEEF -is used to mark the end of the free list since NULL could -indicate that the last object in the list is not free. - -When one of the lib_*_alloc() functions is called, the library -returns the head of the free list and advances the head pointer -to the next item on the list. The special case of 0xDEADBEEF is -checked and a NULL pointer is returned if there are no more -objects of this type available. The lib_*_free() functions -are even simpler -- check to ensure that the object is not already -free, set its next pointer to the current head and then set -the head to be this newly freed object. - -Since C does not have templates, I did the next best thing and wrote -the memory pool allocation code as a macro that expands based on the -type of the argument. The mk_alloc(T) macro expands to -write the _lib_T_alloc() and lib_T_free() functions. -It requires that the object have a pointer of the type T named -"next_free". There are also functions that map _lib_T_alloc() -to lib_T_alloc() so that the library can add some extra -functionality to the T constructor. - - - -LINKED LISTS: ------------- - -Many of the active Portals objects are stored in doubly linked lists -when they are active. These are always implemented with the pointer -to the next object and a pointer to the next pointer of the -previous object. This avoids the "dummy head" object or -special cases for inserting at the beginning or end of the list. -The pointer manipulations are a little hairy at times, but -I hope that they are understandable. - -The actual linked list code is implemented as macros in <lib-p30.h>, -although the object has to know about - - diff --git a/lnet/doc/Makefile.am b/lnet/doc/Makefile.am deleted file mode 100644 index b7f6252f34a539174f2cfe60a36336442e84b1b3..0000000000000000000000000000000000000000 --- a/lnet/doc/Makefile.am +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -LYX2PDF = lyx --export pdf -LYX2TXT = lyx --export text -LYX2HTML = lyx --export html -SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps - -if DOC - DOCS = portals3.pdf -else - DOCS = -endif - -IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps -LYXFILES= portals3.lyx - -MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(GENERATED) -GENERATED = -EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES) - -all: $(DOCS) - -# update date and version in document -date := $(shell date +%x) -tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/') -addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g' - -# Regenerate when the $(VERSION) or $Name: $ changes. -.INTERMEDIATE: $(GENERATED) -$(GENERATED) : %.lyx: %.lin Makefile - $(addversion) $< > $@ - -.lyx.pdf: - @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n" - -.lyx.txt: - @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n" -.lyx.html: - @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n" -.fig.eps: - -fig2dev -L eps $< > $@ - -portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx - -syncweb: portals3.pdf -# cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf -# ( cd /usr/src/www ; make lustre ; make synclustre ) - diff --git a/lnet/doc/Message-life-cycle b/lnet/doc/Message-life-cycle deleted file mode 100644 index e8cc7e21b2276de5e9caed367cd9d83ce6a85955..0000000000000000000000000000000000000000 --- a/lnet/doc/Message-life-cycle +++ /dev/null @@ -1,118 +0,0 @@ -This documents the life cycle of message as it arrives and is handled by -a basic async, packetized NAL. There are four types of messages that have -slightly different life cycles, so they are addressed independently. - - -Put request ------------ - -1. NAL notices that there is a incoming message header on the network -and reads an ptl_hdr_t in from the wire. - -2. It may store additional NAL specific data that provides context -for this event in a void* that it will interpret in some fashion -later. - -3. The NAL calls lib_parse() with a pointer to the header and its -private data structure. - -4. The library decodes the header and may build a message state -object that describes the event to be written and the ACK to be -sent, if any. It then calls nal->recv() with the private data -that the NAL passed in, a pointer to the message state object -and a translated user address. - - The NAL will have been given a chance to pretranslate - all user addresses when the buffers are created. This - process is described in the NAL-HOWTO. - -5. The NAL should restore what ever context it required from the -private data pointer, begin receiving the bytes and possibly store -some extra state of its own. It should return at this point. - - - -Get request ------------ - -1. As with a Put, the NAL notices the incoming message header and -passes it to lib_parse(). - -2. The library decodes the header and calls nal->recv() with a -zero byte length, offset and destination to instruct it to clean -up the wire after reading the header. The private data will -be passed in as well, allowing the NAL to retrieve any state -or context that it requires. - -3. The library may build a message state object to possibly -write an event log or invalidate a memory region. - -4. The library will build a ptl_msg_t header that specifies the -Portals protocol information for delivery at the remote end. - -5. The library calls nal->send() with the pre-built header, -the optional message state object, the four part address -component, a translated user pointer + offset, and some -other things. - -6. The NAL is to put the header on the wire or copy it at -this point (since it off the stack). It should store some -amount of state about its current position in the message and -the destination address. - -7. And then return to the library. - - -Reply request -------------- - -1. Starting at "The library decodes the header..." - -2. The library decodes the header and calls nal->recv() -to bring in the rest of the message. Flow continues in -exactly the same fashion as with all other receives. - - -Ack request ------------ - -1. The library decodes the header, builds the appropriate data -structures for the event in a message state object and calls nal->recv() -with a zero byte length, etc. - - -Packet arrival --------------- - -1. The NAL should notice the arrival of a packet, retrieve whatever -state it needs from the message ID or other NAL specific header data -and place the data bytes directly into the user address that were -given to nal->recv(). - - How this happens is outside the scope of the Portals library - and soley determined by the NAL... - -2. If this is the last packet in a message, the NAL should retrieve -the lib_msg_t *cookie that it was given in the call to nal->recv() -and pass it to lib_finalize(). lib_finalize() may call nal->send() -to send an ACK, nal->write() to record an entry in the event log, -nal->invalidate() to unregister a region of memory or do nothing at all. - -3. It should then clean up any remaining NAL specific state about -the message and go back into the main loop. - - -Outgoing packets ----------------- - -1. When the NAL has pending output, it should put the packets on -the wire wrapped with whatever implementation specified wrappers. - -2. Once it has output all the packets of a message it should -call lib_finalize() with the message state object that was -handed to nal->send(). This will allows the library to clean -up its state regarding the message and write any pending event -entries. - - - diff --git a/lnet/doc/NAL-HOWTO b/lnet/doc/NAL-HOWTO deleted file mode 100644 index ea38aed09e195452b6696ed73f133e247073a1af..0000000000000000000000000000000000000000 --- a/lnet/doc/NAL-HOWTO +++ /dev/null @@ -1,293 +0,0 @@ -This document is a first attempt at describing how to write a NAL -for the Portals 3 library. It also defines the library architecture -and the abstraction of protection domains. - - -First, an overview of the architecture: - - Application - -----|----+-------- - | - API === NAL (User space) - | ----------+---|----- - | - LIB === NAL (Library space) - | ----------+---|----- - - Physical wire (NIC space) - - -Application - API -API-side NAL ------------- -LIB-side NAL - LIB -LIB-side NAL - wire - -Communication is through the indicated paths via well defined -interfaces. The API and LIB portions are written to be portable -across platforms and do not depend on the network interface. - -Communcation between the application and the API code is -defined in the Portals 3 API specification. This is the -user-visible portion of the interface and should be the most -stable. - - - -API-side NAL: ------------- - -The user space NAL needs to implement only a few functions -that are stored in a nal_t data structure and called by the -API-side library: - - int forward( nal_t *nal, - int index, - void *args, - size_t arg_len, - void *ret, - size_t ret_len - ); - -Most of the data structures in the portals library are held in -the LIB section of the code, so it is necessary to forward API -calls across the protection domain to the library. This is -handled by the NAL's forward method. Once the argument and return -blocks are on the remote side the NAL should call lib_dispatch() -to invoke the appropriate API function. - - int validate( nal_t *nal, - void *base, - size_t extent, - void **trans_base, - void **trans_data - ); - -The validate method provides a means for the NAL to prevalidate -and possibly pretranslate user addresses into a form suitable -for fast use by the network card or kernel module. The trans_base -pointer will be used by the library everytime it needs to -refer to the block of memory. The trans_data result is a -cookie that will be handed to the NAL along with the trans_base. - -The library never performs calculations on the trans_base value; -it only computes offsets that are then handed to the NAL. - - - int shutdown( nal_t *nal, int interface ); - -Brings down the network interface. The remote NAL side should -call lib_fini() to bring down the library side of the network. - - void yield( nal_t *nal ); - -This allows the user application to gracefully give up the processor -while busy waiting. Performance critical applications may not -want to take the time to call this function, so it should be an -option to the PtlEQWait call. Right now it is not implemented as such. - -Lastly, the NAL must implement a function named PTL_IFACE_*, where -* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR. -This initialization function is to set up communication with the -library-side NAL, which should call lib_init() to bring up the -network interface. - - - -LIB-side NAL: ------------- - -On the library-side, the NAL has much more responsibility. It -is responsible for calling lib_dispatch() on behalf of the user, -it is also responsible for bringing packets off the wire and -pushing bits out. As on the user side, the methods are stored -in a nal_cb_t structure that is defined on a per network -interface basis. - -The calls to lib_dispatch() need to be examined. The prototype: - - void lib_dispatch( - nal_cb_t *nal, - void *private, - int index, - void *arg_block, - void *ret_block - ); - -has two complications. The private field is a NAL-specific -value that will be passed to any callbacks produced as a result -of this API call. Kernel module implementations may use this -for task structures, or perhaps network card data. It is ignored -by the library. - -Secondly, the arg_block and ret_block must be in the same protection -domain as the library. The NAL's two halves must communicate the -sizes and perform the copies. After the call, the buffer pointed -to by ret_block will be filled in and should be copied back to -the user space. How this is to be done is NAL specific. - - int lib_parse( - nal_cb_t *nal, - ptl_hdr_t *hdr, - void *private - ); - -This is the only other entry point into the library from the NAL. -When the NAL detects an incoming message on the wire it should read -sizeof(ptl_hdr_t) bytes and pass a pointer to the header to -lib_parse(). It may set private to be anything that it needs to -tie the incoming message to callbacks that are made as a result -of this event. - -The method calls are: - - int (*send)( - nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int nid, - int pid, - int gid, - int rid, - user_ptr trans_base, - user_ptr trans_data, - size_t offset, - size_t len - ); - -This is a tricky function -- it must support async output -of messages as well as properly syncronized event log writing. -The private field is the same that was passed into lib_dispatch() -or lib_parse() and may be used to tie this call to the event -that initiated the entry to the library. - -The cookie is a pointer to a library private value that must -be passed to lib_finalize() once the message has been completely -sent. It should not be examined by the NAL for any meaning. - -The four ID fields are passed in, although some implementations -may not use all of them. - -The single base pointer has been replaced with the translated -address that the API NAL generated in the api_nal->validate() -call. The trans_data is unchanged and the offset is in bytes. - - - int (*recv)( - nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - user_ptr trans_base, - user_ptr trans_data, - size_t offset, - size_t mlen, - size_t rlen - ); - -This callback will only be called in response to lib_parse(). -The cookie, trans_addr and trans_data are as discussed in send(). -The NAL should read mlen bytes from the wire, deposit them into -trans_base + offset and then discard (rlen - mlen) bytes. -Once the entire message has been received the NAL should call -lib_finalize() with the lib_msg_t *cookie. - -The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0 -is used to indicate that the NAL should clean up the wire. This could -be implemented as a blocking call, although having it return as quickly -as possible is desirable. - - int (*write)( - nal_cb_t *nal, - void *private, - user_ptr trans_addr, - user_ptr trans_data, - size_t offset, - - void *src_addr, - size_t len - ); - -This is essentially a cross-protection domain memcpy(). The user address -has been pretranslated by the api_nal->translate() call. - - void *(*malloc)( - nal_cb_t *nal, - size_t len - ); - - void (*free)( - nal_cb_t *nal, - void *buf - ); - -Since the NAL may be in a non-standard hosted environment it can -not call malloc(). This allows the library side NAL to implement -the system specific malloc(). In the current reference implementation -the libary only calls nal->malloc() when the network interface is -initialized and then calls free when it is brought down. The library -maintains its own pool of objects for allocation so only one call to -malloc is made per object type. - - void (*invalidate)( - nal_cb_t *nal, - user_ptr trans_base, - user_ptr trans_data, - size_t extent - ); - -User addresses are validated/translated at the user-level API NAL -method, which is likely to push them to this level. Meanwhile, -the library NAL will be notified when the library no longer -needs the buffer. Overlapped buffers are not detected by the -library, so the NAL should ref count each page involved. - -Unfortunately we have a few bugs when the invalidate method is -called. It is still in progress... - - void (*printf)( - nal_cb_t *nal, - const char *fmt, - ... - ); - -As with malloc(), the library does not have any way to do printf -or printk. It is not necessary for the NAL to implement the this -call, although it will make debugging difficult. - - void (*cli)( - nal_cb_t *nal, - unsigned long *flags - ); - - void (*sti)( - nal_cb_t *nal, - unsigned long *flags - ); - -These are used by the library to mark critical sections. - - int (*gidrid2nidpid)( - nal_cb_t *nal, - ptl_id_t gid, - ptl_id_t rid, - ptl_id_t *nid, - ptl_id_t *pid - ); - - - int (*nidpid2gidrid)( - nal_cb_t *nal, - ptl_id_t nid, - ptl_id_t pid, - ptl_id_t *gid, - ptl_id_t *rid - ); - -Rolf added these. I haven't looked at how they have to work yet. diff --git a/lnet/doc/file.fig b/lnet/doc/file.fig deleted file mode 100644 index 914c2941d921c4e1106d2a88a149a95e99d8af14..0000000000000000000000000000000000000000 --- a/lnet/doc/file.fig +++ /dev/null @@ -1,111 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 1200 750 1650 1050 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 1050 1650 750 1200 750 1200 1050 1650 1050 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001 --6 -6 1200 2325 1650 2625 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 2625 1650 2325 1200 2325 1200 2625 1650 2625 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001 --6 -6 1200 1800 1650 2100 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 2100 1650 1800 1200 1800 1200 2100 1650 2100 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001 --6 -6 1200 1275 1650 1575 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 1575 1650 1275 1200 1275 1200 1575 1650 1575 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001 --6 -6 450 750 900 1200 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 825 450 1050 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1050 900 825 --6 -6 450 2325 900 2775 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 2400 450 2625 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2625 900 2400 --6 -6 450 1800 900 2250 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 1875 450 2100 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2100 900 1875 --6 -6 450 1275 900 1725 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 1350 450 1575 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1575 900 1350 --6 -6 2250 750 3450 2625 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1200 3150 1200 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1500 3150 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1800 3150 1800 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 2100 3150 2100 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2550 975 3150 975 3150 2625 2550 2625 2550 975 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 2400 3150 2400 -4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2400 2550 1350 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 1875 2550 1050 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 1425 2550 1950 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 900 2550 1650 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 900 1200 900 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1425 1200 1425 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1950 1200 1950 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2475 1200 2475 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2025 2550 2250 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2550 2550 2475 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1875 2850 1875 600 225 600 225 2850 1875 2850 -4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001 diff --git a/lnet/doc/flow_new.fig b/lnet/doc/flow_new.fig deleted file mode 100644 index d828dea8b7b318fb89d6fb9ff3095b01509dd568..0000000000000000000000000000000000000000 --- a/lnet/doc/flow_new.fig +++ /dev/null @@ -1,213 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 525 2175 1575 2925 -6 675 2287 1425 2812 -4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001 -4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001 -4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 1575 2550 1050 2175 525 2550 1050 2925 1575 2550 --6 -6 3450 1275 4350 1725 -6 3600 1312 4200 1687 -4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001 -4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3450 1275 4350 1275 4350 1725 3450 1725 3450 1275 --6 -6 4650 1275 5550 1725 -6 4725 1312 5475 1687 -4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001 -4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4650 1275 5550 1275 5550 1725 4650 1725 4650 1275 --6 -6 1350 525 2250 975 -6 1350 562 2250 937 -4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001 -4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1350 525 2250 525 2250 975 1350 975 1350 525 --6 -6 525 1125 1575 1875 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 1575 1500 1050 1125 525 1500 1050 1875 1575 1500 -4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001 --6 -6 2340 1237 2940 1687 -6 2340 1237 2940 1687 -4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001 -4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001 -4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001 --6 --6 -6 525 3225 1575 3975 -6 675 3375 1425 3750 -4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001 -4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001 --6 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 525 3600 1050 3225 1575 3600 1050 3975 525 3600 --6 -6 3300 3375 4350 3825 -6 3300 3412 4350 3787 -4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001 -4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3300 3375 4350 3375 4350 3825 3300 3825 3300 3375 --6 -6 1950 3225 3000 3975 -6 2250 3450 2700 3750 -4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001 -4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 3000 3600 2475 3225 1950 3600 2475 3975 3000 3600 --6 -6 3150 4500 4200 4950 -6 3150 4537 4200 4912 -4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001 -4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3150 4500 4200 4500 4200 4950 3150 4950 3150 4500 --6 -6 600 4500 1500 4950 -6 675 4537 1425 4912 -4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001 -4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 600 4500 1500 4500 1500 4950 600 4950 600 4500 --6 -6 4650 4350 5700 5100 -6 4950 4537 5400 4912 -6 4950 4537 5400 4912 -4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001 -4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001 --6 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 5700 4725 5175 4350 4650 4725 5175 5100 5700 4725 --6 -6 6000 4500 6900 4950 -6 6225 4575 6675 4875 -4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001 -4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 6000 4500 6900 4500 6900 4950 6000 4950 6000 4500 --6 -6 1800 4350 2850 5100 -6 2100 4575 2550 4875 -4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001 -4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 2850 4725 2325 4350 1800 4725 2325 5100 2850 4725 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 1875 1050 2175 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1575 1500 2100 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 450 1050 1125 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1350 750 1050 750 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 2925 1050 3225 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3150 1500 3450 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 4350 1500 4650 1500 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 2100 1500 2625 1125 3150 1500 2625 1875 2100 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1575 3600 1950 3600 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 3975 1050 4500 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3000 3600 3300 3600 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 4725 1800 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 5700 4725 6000 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2850 4725 3150 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 4200 4725 4650 4725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 6900 4725 7950 4725 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1575 2550 1650 2550 1800 2550 1800 2400 1800 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 - 0 0 1.00 60.00 120.00 - 2250 750 2475 750 2625 750 2625 900 2625 1125 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 - 0 0 1.00 60.00 120.00 - 7500 4725 7500 1650 7500 1500 7350 1500 5550 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 2475 3225 2475 2400 2475 2250 2325 2250 1800 2250 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 3825 3375 3825 2175 3825 2025 3675 2025 1800 2025 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 - 0 0 1.00 60.00 120.00 - 2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125 - 4425 4275 4425 4725 - 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 - 0 0 1.00 60.00 120.00 - 5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125 - 7275 4275 7275 4725 - 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 -4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001 -4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001 -4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001 -4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001 -4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001 -4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001 diff --git a/lnet/doc/get.fig b/lnet/doc/get.fig deleted file mode 100644 index 28db949a47fb2e7fa45a1f74087c6e58c25ed9a6..0000000000000000000000000000000000000000 --- a/lnet/doc/get.fig +++ /dev/null @@ -1,33 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 2775 900 3525 1200 -4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001 -4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001 --6 -6 1350 1725 2175 2025 -4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001 -4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001 --6 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 900 525 2700 750 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 825 2700 1275 -2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 1350 900 1950 -2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 - 2400 300 3600 300 3600 2250 2400 2250 2400 300 -2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 - 0 300 1200 300 1200 2250 0 2250 0 300 -4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001 -4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 -4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 diff --git a/lnet/doc/ieee.bst b/lnet/doc/ieee.bst deleted file mode 100644 index 4df7c50b7b30c4728257449ff99eef57a2ebb184..0000000000000000000000000000000000000000 --- a/lnet/doc/ieee.bst +++ /dev/null @@ -1,1112 +0,0 @@ -% --------------------------------------------------------------- -% -% by Paolo.Ienne@di.epfl.ch -% -% --------------------------------------------------------------- -% -% no guarantee is given that the format corresponds perfectly to -% IEEE 8.5" x 11" Proceedings, but most features should be ok. -% -% --------------------------------------------------------------- -% -% `ieee' from BibTeX standard bibliography style `abbrv' -% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. -% Copyright (C) 1985, all rights reserved. -% Copying of this file is authorized only if either -% (1) you make absolutely no changes to your copy, including name, or -% (2) if you do make changes, you name it something other than -% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. -% This restriction helps ensure that all standard styles are identical. -% The file btxbst.doc has the documentation for this style. - -ENTRY - { address - author - booktitle - chapter - edition - editor - howpublished - institution - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - volume - year - } - {} - { label } - -INTEGERS { output.state before.all mid.sentence after.sentence after.block } - -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := -} - -STRINGS { s t } - -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = - { add.period$ write$ - newline$ - "\newblock " write$ - } - { output.state before.all = - 'write$ - { add.period$ " " * write$ } - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} - -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} - -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} - -FUNCTION {output.bibitem} -{ newline$ - "\bibitem{" write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {fin.entry} -{ add.period$ - write$ - newline$ -} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} - -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} - -FUNCTION {not} -{ { #0 } - { #1 } - if$ -} - -FUNCTION {and} -{ 'skip$ - { pop$ #0 } - if$ -} - -FUNCTION {or} -{ { pop$ #1 } - 'skip$ - if$ -} - -FUNCTION {new.block.checka} -{ empty$ - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.block.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.sentence.checka} -{ empty$ - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {new.sentence.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {field.or.null} -{ duplicate$ empty$ - { pop$ "" } - 'skip$ - if$ -} - -FUNCTION {emphasize} -{ duplicate$ empty$ - { pop$ "" } - { "{\em " swap$ * "}" * } - if$ -} - -INTEGERS { nameptr namesleft numnames } - -FUNCTION {format.names} -{ 's := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := - nameptr #1 > - { namesleft #1 > - { ", " * t * } - { numnames #2 > - { "," * } - 'skip$ - if$ - t "others" = - { " et~al." * } - { " and " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {format.authors} -{ author empty$ - { "" } - { author format.names } - if$ -} - -FUNCTION {format.editors} -{ editor empty$ - { "" } - { editor format.names - editor num.names$ #1 > - { ", editors" * } - { ", editor" * } - if$ - } - if$ -} - -FUNCTION {format.title} -{ title empty$ - { "" } - { title "t" change.case$ } - if$ -} - -FUNCTION {n.dashify} -{ 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "--" = not - { "--" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {format.date} -{ year empty$ - { month empty$ - { "" } - { "there's a month but no year in " cite$ * warning$ - month - } - if$ - } - { month empty$ - 'year - { month " " * year * } - if$ - } - if$ -} - -FUNCTION {format.btitle} -{ title emphasize -} - -FUNCTION {tie.or.space.connect} -{ duplicate$ text.length$ #3 < - { "~" } - { " " } - if$ - swap$ * * -} - -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} - -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { "volume" volume tie.or.space.connect - series empty$ - 'skip$ - { " of " * series emphasize * } - if$ - "volume and number" number either.or.check - } - if$ -} - -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { output.state mid.sentence = - { "number" } - { "Number" } - if$ - number tie.or.space.connect - series empty$ - { "there's a number but no series in " cite$ * warning$ } - { " in " * series * } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition empty$ - { "" } - { output.state mid.sentence = - { edition "l" change.case$ " edition" * } - { edition "t" change.case$ " edition" * } - if$ - } - if$ -} - -INTEGERS { multiresult } - -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} - -FUNCTION {format.pages} -{ pages empty$ - { "" } - { pages multi.page.check - { "pages" pages n.dashify tie.or.space.connect } - { "page" pages tie.or.space.connect } - if$ - } - if$ -} - -FUNCTION {format.vol.num.pages} -{ volume field.or.null - number empty$ - 'skip$ - { "(" number * ")" * * - volume empty$ - { "there's a number but no volume in " cite$ * warning$ } - 'skip$ - if$ - } - if$ - pages empty$ - 'skip$ - { duplicate$ empty$ - { pop$ format.pages } - { ":" * pages n.dashify * } - if$ - } - if$ -} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - 'format.pages - { type empty$ - { "chapter" } - { type "l" change.case$ } - if$ - chapter tie.or.space.connect - pages empty$ - 'skip$ - { ", " * format.pages * } - if$ - } - if$ -} - -FUNCTION {format.in.ed.booktitle} -{ booktitle empty$ - { "" } - { editor empty$ - { "In " booktitle emphasize * } - { "In " format.editors * ", " * booktitle emphasize * } - if$ - } - if$ -} - -FUNCTION {empty.misc.check} -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - key empty$ not and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} - -FUNCTION {format.thesis.type} -{ type empty$ - 'skip$ - { pop$ - type "t" change.case$ - } - if$ -} - -FUNCTION {format.tr.number} -{ type empty$ - { "Technical Report" } - 'type - if$ - number empty$ - { "t" change.case$ } - { number tie.or.space.connect } - if$ -} - -FUNCTION {format.article.crossref} -{ key empty$ - { journal empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * - warning$ - "" - } - { "In {\em " journal * "\/}" * } - if$ - } - { "In " key * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.crossref.editor} -{ editor #1 "{vv~}{ll}" format.name$ - editor num.names$ duplicate$ - #2 > - { pop$ " et~al." * } - { #2 < - 'skip$ - { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { " et~al." * } - { " and " * editor #2 "{vv~}{ll}" format.name$ * } - if$ - } - if$ - } - if$ -} - -FUNCTION {format.book.crossref} -{ volume empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - "In " - } - { "Volume" volume tie.or.space.connect - " of " * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { "{\em " * series * "\/}" * } - if$ - } - { key * } - if$ - } - { format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.incoll.inproc.crossref} -{ editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { booktitle empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - "" - } - { "In {\em " booktitle * "\/}" * } - if$ - } - { "In " key * } - if$ - } - { "In " format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {article} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { journal emphasize "journal" output.check - format.vol.num.pages output - format.date "year" output.check - } - { format.article.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {booklet} -{ output.bibitem - format.authors output - new.block - format.title "title" output.check - howpublished address new.block.checkb - howpublished output - address output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - format.chapter.pages "chapter and pages" output.check - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { format.chapter.pages "chapter and pages" output.check - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.chapter.pages output - new.sentence - publisher "publisher" output.check - address output - format.edition output - format.date "year" output.check - } - { format.incoll.inproc.crossref output.nonnull - format.chapter.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.pages output - address empty$ - { organization publisher new.sentence.checkb - organization output - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - organization output - publisher output - } - if$ - } - { format.incoll.inproc.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {conference} { inproceedings } - -FUNCTION {manual} -{ output.bibitem - author empty$ - { organization empty$ - 'skip$ - { organization output.nonnull - address output - } - if$ - } - { format.authors output.nonnull } - if$ - new.block - format.btitle "title" output.check - author empty$ - { organization empty$ - { address new.block.checka - address output - } - 'skip$ - if$ - } - { organization address new.block.checkb - organization output - address output - } - if$ - format.edition output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - "Master's thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {misc} -{ output.bibitem - format.authors output - title howpublished new.block.checkb - format.title output - howpublished new.block.checka - howpublished output - format.date output - new.block - note output - fin.entry - empty.misc.check -} - -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.btitle "title" output.check - new.block - "PhD thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - editor empty$ - { organization output } - { format.editors output.nonnull } - if$ - new.block - format.btitle "title" output.check - format.bvolume output - format.number.series output - address empty$ - { editor empty$ - { publisher new.sentence.checka } - { organization publisher new.sentence.checkb - organization output - } - if$ - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - editor empty$ - 'skip$ - { organization output } - if$ - publisher output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - format.tr.number output.nonnull - institution "institution" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - note "note" output.check - format.date output - fin.entry -} - -FUNCTION {default.type} { misc } - -MACRO {jan} {"Jan."} - -MACRO {feb} {"Feb."} - -MACRO {mar} {"Mar."} - -MACRO {apr} {"Apr."} - -MACRO {may} {"May"} - -MACRO {jun} {"June"} - -MACRO {jul} {"July"} - -MACRO {aug} {"Aug."} - -MACRO {sep} {"Sept."} - -MACRO {oct} {"Oct."} - -MACRO {nov} {"Nov."} - -MACRO {dec} {"Dec."} - -MACRO {acmcs} {"ACM Comput. Surv."} - -MACRO {acta} {"Acta Inf."} - -MACRO {cacm} {"Commun. ACM"} - -MACRO {ibmjrd} {"IBM J. Res. Dev."} - -MACRO {ibmsj} {"IBM Syst.~J."} - -MACRO {ieeese} {"IEEE Trans. Softw. Eng."} - -MACRO {ieeetc} {"IEEE Trans. Comput."} - -MACRO {ieeetcad} - {"IEEE Trans. Comput.-Aided Design Integrated Circuits"} - -MACRO {ipl} {"Inf. Process. Lett."} - -MACRO {jacm} {"J.~ACM"} - -MACRO {jcss} {"J.~Comput. Syst. Sci."} - -MACRO {scp} {"Sci. Comput. Programming"} - -MACRO {sicomp} {"SIAM J. Comput."} - -MACRO {tocs} {"ACM Trans. Comput. Syst."} - -MACRO {tods} {"ACM Trans. Database Syst."} - -MACRO {tog} {"ACM Trans. Gr."} - -MACRO {toms} {"ACM Trans. Math. Softw."} - -MACRO {toois} {"ACM Trans. Office Inf. Syst."} - -MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."} - -MACRO {tcs} {"Theoretical Comput. Sci."} - -READ - -FUNCTION {sortify} -{ purify$ - "l" change.case$ -} - -INTEGERS { len } - -FUNCTION {chop.word} -{ 's := - 'len := - s #1 len substring$ = - { s len #1 + global.max$ substring$ } - 's - if$ -} - -FUNCTION {sort.format.names} -{ 's := - #1 'nameptr := - "" - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { nameptr #1 > - { " " * } - 'skip$ - if$ - s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := - nameptr numnames = t "others" = and - { "et al" * } - { t sortify * } - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {sort.format.title} -{ 't := - "A " #2 - "An " #3 - "The " #4 t chop.word - chop.word - chop.word - sortify - #1 global.max$ substring$ -} - -FUNCTION {author.sort} -{ author empty$ - { key empty$ - { "to sort, need author or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.editor.sort} -{ author empty$ - { editor empty$ - { key empty$ - { "to sort, need author, editor, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { editor sort.format.names } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.organization.sort} -{ author empty$ - { organization empty$ - { key empty$ - { "to sort, need author, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {editor.organization.sort} -{ editor empty$ - { organization empty$ - { key empty$ - { "to sort, need editor, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { editor sort.format.names } - if$ -} - -FUNCTION {presort} -{ type$ "book" = - type$ "inbook" = - or - 'author.editor.sort - { type$ "proceedings" = - 'editor.organization.sort - { type$ "manual" = - 'author.organization.sort - 'author.sort - if$ - } - if$ - } - if$ - " " - * - year field.or.null sortify - * - " " - * - title field.or.null - sort.format.title - * - #1 entry.max$ substring$ - 'sort.key$ := -} - -ITERATE {presort} - -SORT - -STRINGS { longest.label } - -INTEGERS { number.label longest.label.width } - -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #1 'number.label := - #0 'longest.label.width := -} - -FUNCTION {longest.label.pass} -{ number.label int.to.str$ 'label := - number.label #1 + 'number.label := - label width$ longest.label.width > - { label 'longest.label := - label width$ 'longest.label.width := - } - 'skip$ - if$ -} - -EXECUTE {initialize.longest.label} - -ITERATE {longest.label.pass} - -FUNCTION {begin.bib} -{ preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" longest.label * - "}\setlength{\itemsep}{-1ex}\small" * write$ newline$ -} - -EXECUTE {begin.bib} - -EXECUTE {init.state.consts} - -ITERATE {call.type$} - -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} - -EXECUTE {end.bib} - -% end of file ieee.bst -% --------------------------------------------------------------- diff --git a/lnet/doc/mpi.fig b/lnet/doc/mpi.fig deleted file mode 100644 index e1a91b5930d3cb193a8b05b9c04ad6beb74dc3db..0000000000000000000000000000000000000000 --- a/lnet/doc/mpi.fig +++ /dev/null @@ -1,117 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 150 1650 900 2025 -4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001 -4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001 --6 -6 150 150 900 525 -4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001 -4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001 --6 -6 2550 4125 3150 4725 -4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001 -4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001 -4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001 --6 -6 1050 1575 1950 1875 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 1575 1950 1575 1950 1875 1050 1875 1050 1575 -4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001 --6 -6 5400 1575 6300 2175 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 5400 1575 6300 1575 6300 2175 5400 2175 5400 1575 -4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001 --6 -6 5400 2400 6300 3000 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 5400 2400 6300 2400 6300 3000 5400 3000 5400 2400 -4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001 --6 -6 1050 2400 1950 2700 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 2400 1950 2400 1950 2700 1050 2700 1050 2400 -4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001 --6 -6 1050 825 1950 1125 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 825 1950 825 1950 1125 1050 1125 1050 825 -4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 1125 1500 1575 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2025 4050 3375 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 150 675 6600 675 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 150 1350 6600 1350 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 4125 3300 4125 3300 4725 2400 4725 2400 4125 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 4500 4050 3675 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 1725 5400 1725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2550 5400 2550 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2850 4050 3450 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 1800 1500 2400 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 825 3300 825 3300 1275 2400 1275 2400 825 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 2625 1500 4125 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 4125 1950 4125 1950 4425 1050 4425 1050 4125 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 300 1500 825 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 975 2400 975 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 1725 2400 1725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 2550 2400 2550 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 4275 2400 4275 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 1575 3300 1575 3300 2175 2400 2175 2400 1575 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 2400 3300 2400 3300 3000 2400 3000 2400 2400 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4050 3300 5250 3300 5250 3750 4050 3750 4050 3300 -4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001 -4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001 -4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001 -4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001 -4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001 -4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001 -4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001 -4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001 -4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001 -4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001 -4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001 -4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001 -4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001 -4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001 -4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001 diff --git a/lnet/doc/portals.fig b/lnet/doc/portals.fig deleted file mode 100644 index 9b1271bd65ed6f24e4de38243b250b2381028d78..0000000000000000000000000000000000000000 --- a/lnet/doc/portals.fig +++ /dev/null @@ -1,68 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1350 900 1650 900 1650 1200 1350 1200 1350 900 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1800 1350 2100 1350 2100 1650 1800 1650 1800 1350 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2250 1800 2550 1800 2550 2100 2250 2100 2250 1800 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 4200 375 4200 2100 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 525 600 1125 600 1125 2100 525 2100 525 600 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4425 1275 4875 1275 4875 1950 4425 1950 4425 1275 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2550 1200 3150 1200 3150 1500 2550 1500 2550 1200 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3000 1425 4425 1425 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3600 825 3750 825 3750 1125 3600 1125 3600 825 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2025 1425 2550 1425 -2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 4425 750 4875 750 4875 1125 4425 1125 4425 750 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3675 975 4425 975 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2 - 0 0 1.00 60.00 120.00 - 825 1050 1350 1050 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1500 1125 1500 1350 1500 1500 1650 1500 1800 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1950 1575 1950 1800 1950 1950 2100 1950 2250 1950 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 - 525 975 1125 975 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 - 525 1125 1125 1125 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7 - 0 0 1.00 60.00 120.00 - 3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975 - 3600 975 - 0.000 1.000 1.000 1.000 1.000 1.000 0.000 -4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001 -4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001 -4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001 -4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001 -4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001 -4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001 -4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001 -4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001 -4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001 diff --git a/lnet/doc/portals3.bib b/lnet/doc/portals3.bib deleted file mode 100644 index 323b99fa58534eb04406f4e27feb36ab5fdfc8b1..0000000000000000000000000000000000000000 --- a/lnet/doc/portals3.bib +++ /dev/null @@ -1,124 +0,0 @@ -@Article{ Cplant, - title = { {M}assively {P}arallel {C}omputing with - {C}ommodity {C}omponents }, - author = { Ron Brightwell and David S. Greenberg and Arthur - B. Maccabe and Rolf Riesen }, - journal = { Parallel Computing }, - volume = { 26 }, - month = { February }, - pages = { 243-266 }, - year = { 2000 } -} - -@Manual{ Portals, - organization = { Sandia National Laboratories }, - title = { {P}uma {P}ortals }, - note = { http://www.cs.sandia.gov/puma/portals }, - year = { 1997 } -} - -@Techreport{ VIA, - title = { {V}irtual {I}nterface {A}rchitecture - {S}pecification {V}ersion 1.0 }, - author = { {Compaq, Microsoft, and Intel} }, - institution = { Compaq, Microsoft, and Intel }, - month = { December }, - year = { 1997 } -} - -@Techreport{ ST, - title = { {I}nformation {T}echnology - {S}cheduled - {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 }, - author = { {Task Group of Technical Committee T11} }, - institution = { Accredited Standards Committee NCITS }, - month = { July }, - year = { 1998 } -} - -@Manual{ TFLOPS, - organization = { Sandia National Laboratories }, - title = { ASCI Red }, - note = { http://www.sandia.gov/ASCI/TFLOP }, - year = { 1996 } -} - -@Techreport{ GM, - title = { The {GM} {M}essage {P}assing {S}ystem }, - author = { {Myricom, Inc.} }, - institution = { {Myricom, Inc.} }, - year = { 1997 }, -} - -@Article{ MPIstandard, - title = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard }, - author = { {Message Passing Interface Forum} }, - journal = { The International Journal of Supercomputer Applications - and High Performance Computing }, - volume = { 8 }, - year = { 1994 } -} - -@Inproceedings{ PumaOS, - author = "Lance Shuler and Chu Jong and Rolf Riesen and - David van Dresser and Arthur B. Maccabe and - Lee Ann Fisk and T. Mack Stallcup", - booktitle = "Proceeding of the 1995 Intel Supercomputer - User's Group Conference", - title = "The {P}uma Operating System for Massively Parallel Computers", - organization = "Intel Supercomputer User's Group", - year = 1995 -} - -@InProceedings{ SUNMOS, -author = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and - Stephen R. Wheat", -title = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide", -booktitle = "Proceedings of the {Intel} Supercomputer Users' Group. 1994 - Annual North America Users' Conference.", -year = 1994, -pages = "245--251", -month = "June", -location = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps" -} - -@InProceedings { PumaMPI, - title = { Design and Implementation of {MPI} on {P}uma Portals }, - author = { Ron Brightwell and Lance Shuler }, - booktitle = { Proceedings of the Second MPI Developer's Conference }, - pages = { 18-25 }, - month = { July }, - year = { 1996 } -} - -@Inproceedings{ FM2, - author = { Mario Lauria and Scott Pakin and Andrew Chien }, - title = { {E}fficient {L}ayering for {H}igh {S}peed - {C}ommunication: {F}ast {M}essages 2.x }, - Booktitle = { Proceedings of the IEEE International Symposium - on High Performance Distributed Computing }, - year = { 1998 } -} - -@Manual { CraySHMEM, - title = "SHMEM Technical Note for C, SG-2516 2.3", - organization = "Cray Research, Inc.", - month = "October", - year = 1994 -} - -@Manual { MPI2, - title = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface", - organization = "Message Passing Interface Forum", - note = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html", - month = "July", - year = 1997 -} - -@InProceedings { PMMPI, - title = { {The Design and Implementation of Zero Copy MPI Using - Commodity Hardware with a High Performance Network} }, - author = { Francis O'Carroll and Hiroshi Tezuka and Atsushi Hori - and Yutaka Ishikawa }, - booktitle = { Proceedings of the ICS }, - year = { 1998 } -} diff --git a/lnet/doc/portals3.lyx b/lnet/doc/portals3.lyx deleted file mode 100644 index 84292807e2da8bceb0139bdf52242e0b002b1505..0000000000000000000000000000000000000000 --- a/lnet/doc/portals3.lyx +++ /dev/null @@ -1,15944 +0,0 @@ -#LyX 1.2 created this file. For more info see http://www.lyx.org/ -\lyxformat 220 -\textclass report -\begin_preamble -\usepackage{fullpage} -\renewenvironment{comment}% -{\begin{quote}\textbf{Discussion}: \slshape}% -{\end{quote}} -\pagestyle{myheadings} -\end_preamble -\language american -\inputencoding auto -\fontscheme pslatex -\graphics default -\paperfontsize 10 -\spacing single -\papersize letterpaper -\paperpackage a4 -\use_geometry 0 -\use_amsmath 0 -\use_natbib 0 -\use_numerical_citations 0 -\paperorientation portrait -\secnumdepth 2 -\tocdepth 2 -\paragraph_separation indent -\defskip medskip -\quotes_language english -\quotes_times 2 -\papercolumns 1 -\papersides 2 -\paperpagestyle headings - -\layout Title - -The Portals 3.2 Message Passing Interface -\newline - Revision 1.1 -\layout Author - -Ron Brightwell -\begin_inset Foot -collapsed true - -\layout Standard - -R. - Brightwell and R. - Riesen are with the Scalable Computing Systems Department, Sandia National - Laboratories, P.O. - Box 5800, Albuquerque, NM\SpecialChar ~ -\SpecialChar ~ -87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov. -\end_inset - -, Arthur B. - Maccabe -\begin_inset Foot -collapsed true - -\layout Standard - -A. - B. - Maccabe is with the Computer Science Department, University of New Mexico, - Albuquerque, NM\SpecialChar ~ -\SpecialChar ~ -87131-1386, maccabe@cs.unm.edu. -\end_inset - -, Rolf Riesen and Trammell Hudson -\layout Abstract - -This report presents a specification for the Portals 3.2 message passing - interface. - Portals 3.2 is intended to allow scalable, high-performance network communicatio -n between nodes of a parallel computing system. - Specifically, it is designed to support a parallel computing platform composed - of clusters of commodity workstations connected by a commodity system area - network fabric. - In addition, Portals 3.2 is well suited to massively parallel processing - and embedded systems. - Portals 3.2 represents an adaption of the data movement layer developed - for massively parallel processing platforms, such as the 4500-node Intel - TeraFLOPS machine. - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -clearpage -\backslash -pagenumbering{roman} -\backslash -setcounter{page}{3} -\end_inset - - -\layout Standard - - -\begin_inset LatexCommand \tableofcontents{} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Standard - - -\begin_inset FloatList figure - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Standard - - -\begin_inset FloatList table - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Chapter* - -Summary of Changes for Revision 1.1 -\layout Enumerate - -Updated version number to 3.2 throughout the document -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sub:PtlGetId} - -\end_inset - -: added -\family typewriter -PTL_SEGV -\family default - to error list for -\shape italic -PtlGetId -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -: added -\family typewriter -PTL_ML_TOOLONG -\family default - to error list for -\shape italic -PtlMEAttach -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meunlink} - -\end_inset - -: removed text referring to a list of associated memory descriptors. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: added text to describe unlinking a free-floating memory descriptor. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - -: added entry for -\family typewriter -ptl_seq_t -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -added definition of -\family typewriter -max_offset -\family default -. -\layout Enumerate - -added text to clarify -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default -. -\end_deeper -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: modified text for -\family typewriter -unlink_op -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -: added text to clarify multiple calls to -\shape italic -PtlNIInit -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: added text to clarify -\family typewriter -unlink_nofit -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:receiving} - -\end_inset - -: removed text indicating that an MD will reject a message if the associated - EQ is full. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: added -\family typewriter -PTL_MD_INUSE -\family default - error code and text to indicate that only MDs with no pending operations - can be unlinked. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - -: added -\family typewriter -PTL_MD_INUSE -\family default - return code. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - -: added user id field, MD handle field, and NI specific failure field to - the -\family typewriter -ptl_event_t -\family default - structure. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - -: added -\family typewriter -ptl_ni_fail_t -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - -: added -\family typewriter -PTL_EVENT_UNLINK -\family default - event type. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - -: removed -\shape slanted -PtlTransId -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, Section -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - -, Section -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -: listed allowable constants with relevant fields. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - -: added -\shape italic -PtlMEAttachAny -\shape default - function. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - -: added -\family typewriter -PTL_PT_FULL -\family default - return code for -\shape italic -PtlMEAttachAny -\shape default -. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - -: updated to reflect new event types. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - -: added -\family typewriter -ptl_nid_t -\family default -, -\family typewriter -ptl_pid_t -\family default -, and -\family typewriter -ptl_uid_t -\family default -. -\layout Chapter* - -Summary of Changes for Version 3.1 -\layout Section* - -Thread Issues -\layout Standard - -The most significant change to the interface from version 3.0 to 3.1 involves - the clarification of how the interface interacts with multi-threaded applicatio -ns. - We adopted a generic thread model in which processes define an address - space and threads share the address space. - Consideration of the API in the light of threads lead to several clarifications - throughout the document: -\layout Enumerate - -Glossary: -\begin_deeper -\layout Enumerate - -added a definition for -\emph on -thread -\emph default -, -\layout Enumerate - -reworded the definition for -\emph on -process -\emph default -. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:apiover} - -\end_inset - -: added section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:threads} - -\end_inset - - to describe the multi-threading model used by the Portals API. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ptlinit} - -\end_inset - -: -\emph on -PtlInit -\emph default - must be called at least once and may be called any number of times. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ptlfini} - -\end_inset - -: -\emph on -PtlFini -\emph default - should be called once as the process is terminating and not as each thread - terminates. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - -: Portals does not define thread ids. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - -: network interfaces are associated with processes, not threads. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -: -\emph on -PtlNIInit -\emph default - must be called at least once and may be called any number of times. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:eqget} - -\end_inset - -: -\emph on -PtlEQGet -\emph default - returns -\family typewriter -PTL_EQ_EMPTY -\family default - if a thread is blocked on -\emph on -PtlEQWait -\emph default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:eqwait} - -\end_inset - -: waiting threads are awakened in FIFO order. - -\layout Standard - -Two functions, -\emph on -PtlNIBarrier -\emph default - and -\emph on -PtlEQCount -\emph default - were removed from the API. - -\emph on -PtlNIBarrier -\emph default - was defined to block the calling process until all of the processes in - the application group had invoked -\emph on -PtlNIBarrier -\emph default -. - We now consider this functionality, along with the concept of groups (see - the discussion under -\begin_inset Quotes eld -\end_inset - -other changes -\begin_inset Quotes erd -\end_inset - -), to be part of the runtime system, not part of the Portals API. - -\emph on -PtlEQCount -\emph default - was defined to return the number of events in an event queue. - Because external operations may lead to new events being added and other - threads may remove events, the value returned by -\emph on -PtlEQCount -\emph default - would have to be a hint about the number of events in the event queue. -\layout Section* - -Handling small, unexpected messages -\layout Standard - -Another set of changes relates to handling small unexpected messages in - MPI. - In designing version 3.0, we assumed that each unexpected message would - be placed in a unique memory descriptor. - To avoid the need to process a long list of memory descriptors, we moved - the memory descriptors out of the match list and hung them off of a single - match list entry. - In this way, large unexpected messages would only encounter a single -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - match list entry before encountering the -\begin_inset Quotes eld -\end_inset - -long message -\begin_inset Quotes erd -\end_inset - - match list entry. - Experience with this strategy identified resource management problems with - this approach. - In particular, a long sequence of very short (or zero length) messages - could quickly exhaust the memory descriptors constructed for handling unexpecte -d messages. - Our new strategy involves the use of several very large memory descriptors - for small unexpected messages. - Consecutive unexpected messages will be written into the first of these - memory descriptors until the memory descriptor fills up. - When the first of the -\begin_inset Quotes eld -\end_inset - -small memory -\begin_inset Quotes erd -\end_inset - - descriptors fills up, it will be unlinked and subsequent short messages - will be written into the next -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - memory descriptor. - In this case, a -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - memory descriptor will be declared full when it does not have sufficient - space for the largest small unexpected message. -\layout Standard - -This lead to two significant changes. - First, each match list entry now has a single memory descriptor rather - than a list of memory descriptors. - Second, in addition to exceeding the operation threshold, a memory descriptor - can be unlinked when the local offset exceeds a specified value. - These changes have lead to several changes in this document: -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{subsec:paddress} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -removed references to the memory descriptor list, -\layout Enumerate - -changed the portals address translation description to indicate that unlinking - a memory descriptor implies unlinking the associated match list entry--match - list entries can no longer be unlinked independently from the memory descriptor. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -removed unlink from argument list, -\layout Enumerate - -removed description of -\family typewriter -ptl_unlink -\family default - type, -\layout Enumerate - -changed wording of the error condition when the Portal table index already - has an associated match list. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - -: removed unlink from argument list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - -: added -\family typewriter -max_offset -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -added description of -\family typewriter -ptl_unlink -\family default - type, -\layout Enumerate - -removed reference to memory descriptor lists, -\layout Enumerate - -changed wording of the error condition when match list entry already has - an associated memory descriptor, -\layout Enumerate - -changed the description of the -\family typewriter -unlink -\family default - argument. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -: removed -\family typewriter -PtlMDInsert -\family default - operation. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - -: removed references to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: removed reference to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:summary} - -\end_inset - -: removed references to PtlMDInsert. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:semantics} - -\end_inset - -: removed reference to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:exmpi} - -\end_inset - -: revised the MPI example to reflect the changes to the interface. - -\layout Standard - -Several changes have been made to improve the general documentation of the - interface. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - -: documented the special value -\family typewriter -PTL_EQ_NONE -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - -: documented the special value -\family typewriter -PTL_ID_ANY -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - -: documented the return value -\family typewriter -PTL_INV_EQ -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - -: clarified the description of the -\emph on -PtlMDUpdate -\emph default - function. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:implvals} - -\end_inset - -: introduced a new section to document the implementation defined values. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:summary} - -\end_inset - -: modified Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - - to indicate where each constant is introduced and where it is used. - -\layout Section* - -Other changes -\layout Subsection* - -Implementation defined limits (Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -) -\layout Standard - -The earlier version provided implementation defined limits for the maximum - number of match entries, the maximum number of memory descriptors, etc. - Rather than spanning the entire implementation, these limits are now associated - with individual network interfaces. -\layout Subsection* - -Added User Ids (Section -\begin_inset LatexCommand \ref{sec:uid} - -\end_inset - -) -\layout Standard - -Group Ids had been used to simplify access control entries. - In particular, a process could allow access for all of the processes in - a group. - User Ids have been introduced to regain this functionality. - We use user ids to fill this role. -\layout Subsection* - -Removed Group Ids and Rank Ids (Section -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - -) -\layout Standard - -The earlier version of Portals had two forms for addressing processes: <node - id, process id> and <group id, rank id>. - A process group was defined as the collection processes created during - application launch. - Each process in the group was given a unique rank id in the range 0 to - -\begin_inset Formula $n-1$ -\end_inset - - where -\begin_inset Formula $n$ -\end_inset - - was the number of processes in the group. - We removed groups because they are better handled in the runtime system. -\layout Subsection* - -Match lists (Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -) -\layout Standard - -It is no longer illegal to have an existing match entry when calling PtlMEAttach. - A position argument was added to the list of arguments supplied to -\emph on -PtlMEAttach -\emph default - to specify whether the new match entry is prepended or appended to the - existing list. - If there is no existing match list, the position argument is ignored. -\layout Subsection* - -Unlinking Memory Descriptors (Section -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -) -\layout Standard - -Previously, a memory descriptor could be unlinked if the offset exceeded - a threshold upon the completion of an operation. - In this version, the unlinking is delayed until there is a matching operation - which requires more memory than is currently available in the descriptor. - In addition to changes in section, this lead to a revision of Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:flow} - -\end_inset - -. -\layout Subsection* - -Split Phase Operations and Events (Section -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - -) -\layout Standard - -Previously, there were five types of events: -\family typewriter -PTL_EVENT_PUT -\family default -, -\family typewriter -PTL_EVENT_GET -\family default -, -\family typewriter -PTL_EVENT_REPLY -\family default -, -\family typewriter -PTL_EVENT_SENT -\family default -, and -\family typewriter -PTL_EVENT_ACK. - -\family default -The first four of these reflected the completion of potentially long operations. - We have introduced new event types to reflect the fact that long operations - have a distinct starting point and a distinct completion point. - Moreover, the completion may be successful or unsuccessful. -\layout Standard - -In addition to providing a mechanism for reporting failure to higher levels - of software, this split provides an opportunity for for improved ordering - semantics. - Previously, if one process intiated two operations (e.g., two put operations) - on a remote process, these operations were guaranteed to complete in the - same order that they were initiated. - Now, we only guarantee that the initiation events are delivered in the - same order. - In particular, the operations do not need to complete in the order that - they were intiated. -\layout Subsection* - -Well known proces ids (Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -) -\layout Standard - -To support the notion of -\begin_inset Quotes eld -\end_inset - -well known process ids, -\begin_inset Quotes erd -\end_inset - - we added a process id argument to the arguments for PtlNIInit. -\layout Chapter* - -Glossary -\layout Description - -API Application Programming Interface. - A definition of the functions and semantics provided by library of functions. - -\layout Description - -Initiator A -\emph on -process -\emph default - that initiates a message operation. - -\layout Description - -Message An application-defined unit of data that is exchanged between -\emph on -processes -\emph default -. - -\layout Description - -Message\SpecialChar ~ -Operation Either a put operation, which writes data, or a get operation, - which reads data. - -\layout Description - -Network A network provides point-to-point communication between -\emph on -nodes -\emph default -. - Internally, a network may provide multiple routes between endpoints (to - improve fault tolerance or to improve performance characteristics); however, - multiple paths will not be exposed outside of the network. - -\layout Description - -Node A node is an endpoint in a -\emph on -network -\emph default -. - Nodes provide processing capabilities and memory. - A node may provide multiple processors (an SMP node) or it may act as a - -\emph on -gateway -\emph default - between networks. - -\layout Description - -Process A context of execution. - A process defines a virtual memory (VM) context. - This context is not shared with other processes. - Several threads may share the VM context defined by a process. - -\layout Description - -Target A -\emph on -process -\emph default - that is acted upon by a message operation. - -\layout Description - -Thread A context of execution that shares a VM context with other threads. - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\layout Standard - -\backslash -setcounter{page}{1} -\backslash -pagenumbering{arabic} -\end_inset - - -\layout Chapter - -Introduction -\begin_inset LatexCommand \label{sec:intro} - -\end_inset - - -\layout Section - -Overview -\layout Standard - -This document describes an application programming interface for message - passing between nodes in a system area network. - The goal of this interface is to improve the scalability and performance - of network communication by defining the functions and semantics of message - passing required for scaling a parallel computing system to ten thousand - nodes. - This goal is achieved by providing an interface that will allow a quality - implementation to take advantage of the inherently scalable design of Portals. -\layout Standard - -This document is divided into several sections: -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:intro} - -\end_inset - ----Introduction This section describes the purpose and scope of the Portals - API. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:apiover} - -\end_inset - ----An\SpecialChar ~ -Overview\SpecialChar ~ -of\SpecialChar ~ -the\SpecialChar ~ -Portals\SpecialChar ~ -3.1\SpecialChar ~ -API This section gives a brief overview of the - Portals API. - The goal is to introduce the key concepts and terminology used in the descripti -on of the API. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:api} - -\end_inset - ----The\SpecialChar ~ -Portals\SpecialChar ~ -3.2\SpecialChar ~ -API This section describes the functions and semantics of - the Portals application programming interface. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:semantics} - -\end_inset - ---The\SpecialChar ~ -Semantics\SpecialChar ~ -of\SpecialChar ~ -Message\SpecialChar ~ -Transmission This section describes the semantics - of message transmission. - In particular, the information transmitted in each type of message and - the processing of incoming messages. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:examples} - -\end_inset - ----Examples This section presents several examples intended to illustrates - the use of the Portals API. - -\layout Section - -Purpose -\layout Standard - -Existing message passing technologies available for commodity cluster networking - hardware do not meet the scalability goals required by the Cplant\SpecialChar ~ - -\begin_inset LatexCommand \cite{Cplant} - -\end_inset - - project at Sandia National Laboratories. - The goal of the Cplant project is to construct a commodity cluster that - can scale to the order of ten thousand nodes. - This number greatly exceeds the capacity for which existing message passing - technologies have been designed and implemented. -\layout Standard - -In addition to the scalability requirements of the network, these technologies - must also be able to support a scalable implementation of the Message Passing - Interface (MPI)\SpecialChar ~ - -\begin_inset LatexCommand \cite{MPIstandard} - -\end_inset - - standard, which has become the -\shape italic -de facto -\shape default - standard for parallel scientific computing. - While MPI does not impose any scalability limitations, existing message - passing technologies do not provide the functionality needed to allow implement -ations of MPI to meet the scalability requirements of Cplant. -\layout Standard - -The following are properties of a network architecture that do not impose - any inherent scalability limitations: -\layout Itemize - -Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~ - -\begin_inset LatexCommand \cite{VIA} - -\end_inset - - and TCP/IP sockets, have limitations on the number of peer connections - that can be established. - -\layout Itemize - -Network independence - Many communication systems depend on the host processor - to perform operations in order for messages in the network to be consumed. - Message consumption from the network should not be dependent on host processor - activity, such as the operating system scheduler or user-level thread scheduler. - -\layout Itemize - -User-level flow control - Many communication systems manage flow control - internally to avoid depleting resources, which can significantly impact - performance as the number of communicating processes increases. - -\layout Itemize - -OS Bypass - High performance network communication should not involve memory - copies into or out of a kernel-managed protocol stack. - -\layout Standard - -The following are properties of a network architecture that do not impose - scalability limitations for an implementation of MPI: -\layout Itemize - -Receiver-managed - Sender-managed message passing implementations require - a persistent block of memory to be available for every process, requiring - memory resources to increase with job size and requiring user-level flow - control mechanisms to manage these resources. - -\layout Itemize - -User-level Bypass - While OS Bypass is necessary for high-performance, it - alone is not sufficient to support the Progress Rule of MPI asynchronous - operations. - -\layout Itemize - -Unexpected messages - Few communication systems have support for receiving - messages for which there is no prior notification. - Support for these types of messages is necessary to avoid flow control - and protocol overhead. - -\layout Section - -Background -\layout Standard - -Portals was originally designed for and implemented on the nCube machine - as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~ - -\begin_inset LatexCommand \cite{SUNMOS} - -\end_inset - - and Puma\SpecialChar ~ - -\begin_inset LatexCommand \cite{PumaOS} - -\end_inset - - lightweight kernel development projects. - Portals went through two design phases, the latter of which is used on - the 4500-node Intel TeraFLOPS machine\SpecialChar ~ - -\begin_inset LatexCommand \cite{TFLOPS} - -\end_inset - -. - Portals have been very successful in meeting the needs of such a large - machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~ - -\begin_inset LatexCommand \cite{PumaMPI} - -\end_inset - -, but also for implementing the scalable run-time environment and parallel - I/O capabilities of the machine. -\layout Standard - -The second generation Portals implementation was designed to take full advantage - of the hardware architecture of large MPP machines. - However, efforts to implement this same design on commodity cluster technology - identified several limitations, due to the differences in network hardware - as well as to shortcomings in the design of Portals. -\layout Section - -Scalability -\layout Standard - -The primary goal in the design of Portals is scalability. - Portals are designed specifically for an implementation capable of supporting - a parallel job running on tens of thousands of nodes. - Performance is critical only in terms of scalability. - That is, the level of message passing performance is characterized by how - far it allows an application to scale and not by how it performs in micro-bench -marks (e.g., a two node bandwidth or latency test). -\layout Standard - -The Portals API is designed to allow for scalability, not to guarantee it. - Portals cannot overcome the shortcomings of a poorly designed application - program. - Applications that have inherent scalability limitations, either through - design or implementation, will not be transformed by Portals into scalable - applications. - Scalability must be addressed at all levels. - Portals do not inhibit scalability, but do not guarantee it either. -\layout Standard - -To support scalability, the Portals interface maintains a minimal amount - of state. - Portals provide reliable, ordered delivery of messages between pairs of - processes. - They are connectionless: a process is not required to explicitly establish - a point-to-point connection with another process in order to communicate. - Moreover, all buffers used in the transmission of messages are maintained - in user space. - The target process determines how to respond to incoming messages, and - messages for which there are no buffers are discarded. -\layout Section - -Communication Model -\layout Standard - -Portals combine the characteristics of both one-side and two-sided communication. - They define a -\begin_inset Quotes eld -\end_inset - -matching put -\begin_inset Quotes erd -\end_inset - - operation and a -\begin_inset Quotes eld -\end_inset - -matching get -\begin_inset Quotes erd -\end_inset - - operation. - The destination of a put (or send) is not an explicit address; instead, - each message contains a set of match bits that allow the receiver to determine - where incoming messages should be placed. - This flexibility allows Portals to support both traditional one-sided operation -s and two-sided send/receive operations. -\layout Standard - -Portals allows the target to determine whether incoming messages are acceptable. - A target process can choose to accept message operations from any specific - process or can choose to ignore message operations from any specific process. -\layout Section - -Zero Copy, OS Bypass and Application Bypass -\layout Standard - -In traditional system architectures, network packets arrive at the network - interface card (NIC), are passed through one or more protocol layers in - the operating system, and eventually copied into the address space of the - application. - As network bandwidth began to approach memory copy rates, reduction of - memory copies became a critical concern. - This concern lead to the development of zero-copy message passing protocols - in which message copies are eliminated or pipelined to avoid the loss of - bandwidth. -\layout Standard - -A typical zero-copy protocol has the NIC generate an interrupt for the CPU - when a message arrives from the network. - The interrupt handler then controls the transfer of the incoming message - into the address space of the appropriate application. - The interrupt latency, the time from the initiation of an interrupt until - the interrupt handler is running, is fairly significant. - To avoid this cost, some modern NICs have processors that can be programmed - to implement part of a message passing protocol. - Given a properly designed protocol, it is possible to program the NIC to - control the transfer of incoming messages, without needing to interrupt - the CPU. - Because this strategy does not need to involve the OS on every message - transfer, it is frequently called -\begin_inset Quotes eld -\end_inset - -OS Bypass. -\begin_inset Quotes erd -\end_inset - - ST\SpecialChar ~ - -\begin_inset LatexCommand \cite{ST} - -\end_inset - -, VIA\SpecialChar ~ - -\begin_inset LatexCommand \cite{VIA} - -\end_inset - -, FM\SpecialChar ~ - -\begin_inset LatexCommand \cite{FM2} - -\end_inset - -, GM\SpecialChar ~ - -\begin_inset LatexCommand \cite{GM} - -\end_inset - -, and Portals are examples of OS Bypass protocols. -\layout Standard - -Many protocols that support OS Bypass still require that the application - actively participate in the protocol to ensure progress. - As an example, the long message protocol of PM requires that the application - receive and reply to a request to put or get a long message. - This complicates the runtime environment, requiring a thread to process - incoming requests, and significantly increases the latency required to - initiate a long message protocol. - The Portals message passing protocol does not require activity on the part - of the application to ensure progress. - We use the term -\begin_inset Quotes eld -\end_inset - -Application Bypass -\begin_inset Quotes erd -\end_inset - - to refer to this aspect of the Portals protocol. -\layout Section - -Faults -\layout Standard - -Given the number of components that we are dealing with and the fact that - we are interested in supporting applications that run for very long times, - failures are inevitable. - The Portals API recognizes that the underlying transport may not be able - to successfully complete an operation once it has been initiated. - This is reflected in the fact that the Portals API reports three types - of events: events indicating the initiation of an operation, events indicating - the successful completion of an operation, and events indicating the unsuccessf -ul completion of an operation. - Every initiation event is eventually followed by a successful completion - event or an unsuccessful completion event. -\layout Standard - -Between the time an operation is started and the time that the operation - completes (successfully or unsuccessfully), any memory associated with - the operation should be considered volatile. - That is, the memory may be changed in unpredictable ways while the operation - is progressing. - Once the operation completes, the memory associated with the operation - will not be subject to further modification (from this operation). - Notice that unsuccessful operations may alter memory in an essentially - unpredictable fashion. -\layout Chapter - -An Overview of the Portals API -\begin_inset LatexCommand \label{sec:apiover} - -\end_inset - - -\layout Standard - -In this section, we give a conceptual overview of the Portals API. - The goal is to provide a context for understanding the detailed description - of the API presented in the next section. -\layout Section - -Data Movement -\begin_inset LatexCommand \label{sec:dmsemantics} - -\end_inset - - -\layout Standard - -A Portal represents an opening in the address space of a process. - Other processes can use a Portal to read (get) or write (put) the memory - associated with the portal. - Every data movement operation involves two processes, the -\series bold -initiator -\series default - and the -\series bold -target -\series default -. - The initiator is the process that initiates the data movement operation. - The target is the process that responds to the operation by either accepting - the data for a put operation, or replying with the data for a get operation. -\layout Standard - -In this discussion, activities attributed to a process may refer to activities - that are actually performed by the process or -\emph on -on behalf of the process -\emph default -. - The inclusiveness of our terminology is important in the context of -\emph on -application bypass -\emph default -. - In particular, when we note that the target sends a reply in the case of - a get operation, it is possible that reply will be generated by another - component in the system, bypassing the application. -\layout Standard - -Figures\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:put} - -\end_inset - - and -\begin_inset LatexCommand \ref{fig:get} - -\end_inset - - present graphical interpretations of the Portal data movement operations: - put and get. - In the case of a put operation, the initiator sends a put request message - containing the data to the target. - The target translates the Portal addressing information in the request - using its local Portal structures. - When the request has been processed, the target optionally sends an acknowledge -ment message. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename put.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 218pt - lyxheight 119pt -\end_inset - - -\layout Caption - -Portal Put (Send) -\begin_inset LatexCommand \label{fig:put} - -\end_inset - - -\end_inset - - -\layout Standard - -In the case of a get operation, the initiator sends a get request to the - target. - As with the put operation, the target translates the Portal addressing - information in the request using its local Portal structures. - Once it has translated the Portal addressing information, the target sends - a reply that includes the requested data. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename get.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 218pt - lyxheight 119pt -\end_inset - - -\layout Caption - -Portal Get -\begin_inset LatexCommand \label{fig:get} - -\end_inset - - -\end_inset - - -\layout Standard - -We should note that Portal address translations are only performed on nodes - that respond to operations initiated by other nodes. - Acknowledgements and replies to get operations bypass the portals address - translation structures. -\layout Section - -Portal Addressing -\begin_inset LatexCommand \label{subsec:paddress} - -\end_inset - - -\layout Standard - -One-sided data movement models (e.g., shmem\SpecialChar ~ - -\begin_inset LatexCommand \cite{CraySHMEM} - -\end_inset - -, ST\SpecialChar ~ - -\begin_inset LatexCommand \cite{ST} - -\end_inset - -, MPI-2\SpecialChar ~ - -\begin_inset LatexCommand \cite{MPI2} - -\end_inset - -) typically use a triple to address memory on a remote node. - This triple consists of a process id, memory buffer id, and offset. - The process id identifies the target process, the memory buffer id specifies - the region of memory to be used for the operation, and the offset specifies - an offset within the memory buffer. -\layout Standard - -In addition to the standard address components (process id, memory buffer - id, and offset), a Portal address includes a set of match bits. - This addressing model is appropriate for supporting one-sided operations - as well as traditional two-sided message passing operations. - Specifically, the Portals API provides the flexibility needed for an efficient - implementation of MPI-1, which defines two-sided operations with one-sided - completion semantics. -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:portals} - -\end_inset - - presents a graphical representation of the structures used by a target - in the interpretation of a Portal address. - The process id is used to route the message to the appropriate node and - is not reflected in this diagram. - The memory buffer id, called the -\series bold -portal id -\series default -, is used as an index into the Portal table. - Each element of the Portal table identifies a match list. - Each element of the match list specifies two bit patterns: a set of -\begin_inset Quotes eld -\end_inset - -don't care -\begin_inset Quotes erd -\end_inset - - bits, and a set of -\begin_inset Quotes eld -\end_inset - -must match -\begin_inset Quotes erd -\end_inset - - bits. - In addition to the two sets of match bits, each match list element has - at most one memory descriptor. - Each memory descriptor identifies a memory region and an optional event - queue. - The memory region specifies the memory to be used in the operation and - the event queue is used to record information about these operations. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename portals.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 305pt - lyxheight 106pt -\end_inset - - -\layout Caption - -Portal Addressing Structures -\begin_inset LatexCommand \label{fig:portals} - -\end_inset - - -\end_inset - - -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:flow} - -\end_inset - - illustrates the steps involved in translating a Portal address, starting - from the first element in a match list. - If the match criteria specified in the match list entry are met and the - memory descriptor list accepts the operation -\begin_inset Foot -collapsed true - -\layout Standard - -Memory descriptors can reject operations because a threshold has been exceeded - or because the memory region does not have sufficient space, see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset - -, the operation (put or get) is performed using the memory region specified - in the memory descriptor. - If the memory descriptor specifies that it is to be unlinked when a threshold - has been exceeded, the match list entry is removed from the match list - and the resources associated with the memory descriptor and match list - entry are reclaimed. - Finally, if there is an event queue specified in the memory descriptor, - the operation is logged in the event queue. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename flow_new.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 447pt - lyxheight 282pt -\end_inset - - -\layout Caption - -Portals Address Translation -\begin_inset LatexCommand \label{fig:flow} - -\end_inset - - -\end_inset - - -\layout Standard - -If the match criteria specified in the match list entry are not met, or - there is no memory descriptor associated with the match list entry, or - the memory descriptor associated with the match list entry rejects the - operation, the address translation continues with the next match list entry. - If the end of the match list has been reached, the address translation - is aborted and the incoming requested is discarded. -\layout Section - -Access Control -\layout Standard - -A process can control access to its portals using an access control list. - Each entry in the access control list specifies a process id and a Portal - table index. - The access control list is actually an array of entries. - Each incoming request includes an index into the access control list (i.e., - a -\begin_inset Quotes eld -\end_inset - -cookie -\begin_inset Quotes erd -\end_inset - - or hint). - If the id of the process issuing the request doesn't match the id specified - in the access control list entry or the Portal table index specified in - the request doesn't match the Portal table index specified in the access - control list entry, the request is rejected. - Process identifiers and Portal table indexes may include wild card values - to increase the flexibility of this mechanism. - -\layout Standard - -Two aspects of this design merit further discussion. - First, the model assumes that the information in a message header, the - sender's id in particular, is trustworthy. - In most contexts, we assume that the entity that constructs the header - is trustworthy; however, using cryptographic techniques, we could easily - devise a protocol that would ensure the authenticity of the sender. -\layout Standard - -Second, because the access check is performed by the receiver, it is possible - that a malicious process will generate thousands of messages that will - be denied by the receiver. - This could saturate the network and/or the receiver, resulting in a -\emph on -denial of service -\emph default - attack. - Moving the check to the sender using capabilities, would remove the potential - for this form of attack. - However, the solution introduces the complexities of capability management - (exchange of capabilities, revocation, protections, etc). -\layout Section - -Multi-threaded Applications -\begin_inset LatexCommand \label{sec:threads} - -\end_inset - - -\layout Standard - -The Portals API supports a generic view of multi-threaded applications. - From the perspective of the Portals API, an application program is defined - by a set of processes. - Each process defines a unique address space. - The Portals API defines access to this address space from other processes - (using portals addressing and the data movement operations). - A process may have one or more -\emph on -threads -\emph default - executing in its address space. - -\layout Standard - -With the exception of -\emph on -PtlEQWait -\emph default - every function in the Portals API is non-blocking and atomic with respect - to both other threads and external operations that result from data movement - operations. - While individual operations are atomic, sequences of these operations may - be interleaved between different threads and with external operations. - The Portals API does not provide any mechanisms to control this interleaving. - It is expected that these mechanisms will be provided by the API used to - create threads. -\layout Chapter - -The Portals API -\begin_inset LatexCommand \label{sec:api} - -\end_inset - - -\layout Section - -Naming Conventions -\begin_inset LatexCommand \label{sec:conv} - -\end_inset - - -\layout Standard - -The Portals API defines two types of entities: functions and types. - Function always start with -\emph on -Ptl -\emph default - and use mixed upper and lower case. - When used in the body of this report, function names appear in italic face, - e.g., -\emph on -PtlInit -\emph default -. - The functions associated with an object type will have names that start - with -\emph on -Ptl -\emph default -, followed by the two letter object type code shown in Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:objcodes} - -\end_inset - -. - As an example, the function -\emph on -PtlEQAlloc -\emph default - allocates resources for an event queue. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Object Type Codes -\begin_inset LatexCommand \label{tab:objcodes} - -\end_inset - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\newline - -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\emph on -xx -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Section -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -EQ -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Event Queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - MD -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Memory Descriptor -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - ME -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Match list Entry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - NI -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Network Interface -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Type names use lower case with underscores to separate words. - Each type name starts with -\family typewriter -ptl -\family default -_ and ends with -\family typewriter -_t -\family default -. - When used in the body of this report, type names appear in a fixed font, - e.g., -\family typewriter -ptl_match_bits_t -\family default -. -\layout Standard - -Names for constants use upper case with underscores to separate words. - Each constant name starts with -\family typewriter -PTL_ -\family default -. - When used in the body of this report, type names appear in a fixed font, - e.g., -\family typewriter -PTL_OK -\family default -. -\layout Section - -Base Types -\layout Standard - -The Portals API defines a variety of base types. - These types represent a simple renaming of the base types provided by the - C programming language. - In most cases these new type names have been introduced to improve type - safety and to avoid issues arising from differences in representation sizes - (e.g., 16-bit or 32-bit integers). -\layout Subsection - -Sizes -\begin_inset LatexCommand \label{sec:size-t} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_size_t -\family default - is an unsigned 64-bit integral type used for representing sizes. -\layout Subsection - -Handles -\begin_inset LatexCommand \label{sec:handle-type} - -\end_inset - - -\layout Standard - -Objects maintained by the API are accessed through handles. - Handle types have names of the form -\family typewriter -ptl_handle_ -\emph on -xx -\emph default -_t -\family default -, where -\emph on -xx -\emph default - is one of the two letter object type codes shown in Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:objcodes} - -\end_inset - -. - For example, the type -\family typewriter -ptl_handle_ni_t -\family default - is used for network interface handles. -\layout Standard - -Each type of object is given a unique handle type to enhance type checking. - The type, -\family typewriter -ptl_handle_any_t -\family default -, can be used when a generic handle is needed. - Every handle value can be converted into a value of type -\family typewriter -ptl_handle_any_t -\family default - without loss of information. -\layout Standard - -Handles are not simple values. - Every portals object is associated with a specific network interface and - an identifier for this interface (along with an object identifier) is part - of the handle for the object. -\layout Standard - -The special value -\family typewriter -PTL_EQ_NONE -\family default -, of type -\family typewriter -ptl_handle_eq_t -\family default -, is used to indicate the absence of an event queue. - See sections -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - - and\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - - for uses of this value. -\layout Subsection - -Indexes -\begin_inset LatexCommand \label{sec:index-type} - -\end_inset - - -\layout Standard - -The types -\family typewriter -ptl_pt_index_t -\family default - and -\family typewriter -ptl_ac_index_t -\family default - are integral types used for representing Portal table indexes and access - control tables indexes, respectively. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - - for limits on values of these types. -\layout Subsection - -Match Bits -\begin_inset LatexCommand \label{sec:mb-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_match_bits_t -\family default - is capable of holding unsigned 64-bit integer values. -\layout Subsection - -Network Interfaces -\begin_inset LatexCommand \label{sec:ni-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_interface_t -\family default - is an integral type used for identifying different network interfaces. - Users will need to consult the local documentation to determine appropriate - values for the interfaces available. - The special value -\family typewriter -PTL_IFACE_DEFAULT -\family default - identifies the default interface. -\layout Subsection - -Identifiers -\begin_inset LatexCommand \label{sec:id-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_nid_t -\family default - is an integral type used for representing node ids -\family typewriter -, ptl_pid_t -\family default - is an integral type for representing process ids, and -\family typewriter -ptl_uid_t -\family default -is an integral type for representing user ids. -\layout Standard - -The special values -\family typewriter -PTL_PID_ANY -\family default - matches any process identifier, PTL_NID_ANY matches any node identifier, - and -\family typewriter -PTL_UID_ANY -\family default - matches any user identifier. - See sections -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - and\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - for uses of these values. -\layout Subsection - -Status Registers -\begin_inset LatexCommand \label{sec:stat-type} - -\end_inset - - -\layout Standard - -Each network interface maintains an array of status registers that can be - accessed using the -\family typewriter -PtlNIStatus -\family default - function (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - -). - The type -\family typewriter -ptl_sr_index_t -\family default - defines the types of indexes that can be used to access the status registers. - The only index defined for all implementations is -\family typewriter -PTL_SR_DROP_COUNT -\family default - which identifies the status register that counts the dropped requests for - the interface. - Other indexes (and registers) may be defined by the implementation. -\layout Standard - -The type -\family typewriter -ptl_sr_value_t -\family default - defines the types of values held in status registers. - This is a signed integer type. - The size is implementation dependent, but must be at least 32 bits. -\layout Section - -Initialization and Cleanup -\begin_inset LatexCommand \label{sec:init} - -\end_inset - - -\layout Standard - -The Portals API includes a function, -\emph on -PtlInit -\emph default -, to initialize the library and a function, -\emph on -PtlFini -\emph default -, to cleanup after the application is done using the library. -\layout Subsection - -PtlInit -\begin_inset LatexCommand \label{sec:ptlinit} - -\end_inset - - -\layout LyX-Code - -int PtlInit( int *max_interfaces ); -\layout Standard -\noindent -The -\emph on -PtlInit -\emph default - function initializes the Portals library. - PtlInit must be called at least once by a process before any thread makes - a Portals function call, but may be safely called more than once. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_FAIL Indicates an error during initialization. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -max_interfaces -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -max_interfaces -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the maximum number of interfaces - that can be initialized. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlFini -\begin_inset LatexCommand \label{sec:ptlfini} - -\end_inset - - -\layout LyX-Code - -void PtlFini( void ); -\layout Standard -\noindent -The -\emph on -PtlFini -\emph default - function cleans up after the Portals library is no longer needed by a process. - After this function is called, calls to any of the functions defined by - the Portal API or use of the structures set up by the Portals API will - result in undefined behavior. - This function should be called once and only once during termination by - a process. - Typically, this function will be called in the exit sequence of a process. - Individual threads should not call PtlFini when they terminate. -\layout Section - -Network Interfaces -\begin_inset LatexCommand \label{sec:ni} - -\end_inset - - -\layout Standard - -The Portals API supports the use of multiple network interfaces. - However, each interface is treated as an independent entity. - Combining interfaces (e.g., -\begin_inset Quotes eld -\end_inset - -bonding -\begin_inset Quotes erd -\end_inset - - to create a higher bandwidth connection) must be implemented by the application - or embedded in the underlying network. - Interfaces are treated as independent entities to make it easier to cache - information on individual network interface cards. -\layout Standard - -Once initialized, each interface provides a Portal table, an access control - table, and a collection of status registers. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - for a discussion of updating Portal table entries using the -\emph on -PtlMEAttach -\emph default - function. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ac} - -\end_inset - - for a discussion of the initialization and updating of entries in the access - control table. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - - for a discussion of the -\emph on -PtlNIStatus -\emph default - function which can be used to determine the value of a status register. -\layout Standard - -Every other type of Portal object (e.g., memory descriptor, event queue, or - match list entry) is associated with a specific network interface. - The association to a network interface is established when the object is - created and is encoded in the handle for the object. -\layout Standard - -Each network interface is initialized and shutdown independently. - The initialization routine, -\emph on -PtlNIInit -\emph default -, returns a handle for an interface object which is used in all subsequent - Portal operations. - The -\emph on -PtlNIFini -\emph default - function is used to shutdown an interface and release any resources that - are associated with the interface. - Network interface handles are associated with processes, not threads. - All threads in a process share all of the network interface handles. -\layout Standard - -The Portals API also defines the -\emph on -PtlNIStatus -\emph default - function to query the status registers for a network interface, the -\emph on -PtlNIDist -\emph default - function to determine the -\begin_inset Quotes eld -\end_inset - -distance -\begin_inset Quotes erd -\end_inset - - to another process, and the -\emph on -PtlNIHandle -\emph default - function to determine the network interface that an object is associated - with. -\layout Subsection - -PtlNIInit -\begin_inset LatexCommand \label{sec:niinit} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - int max_match_entries; -\newline - int max_mem_descriptors; -\newline - int max_event_queues; -\newline - ptl_ac_index_t max_atable_index; -\newline - ptl_pt_index_t max_ptable_index; -\newline -} ptl_ni_limits_t; -\newline - -\newline -int PtlNIInit( ptl_interface_t interface -\newline - ptl_pid_t pid, -\newline - ptl_ni_limits_t* desired, -\newline - ptl_ni_limits_t* actual, -\newline - ptl_handle_ni_t* handle ); -\layout Standard - -Values of type -\family typewriter -ptl_ni_limits_t -\family default - include the following members: -\layout Description - -max_match_entries Maximum number of match entries that can be allocated - at any one time. -\layout Description - -max_mem_descriptors Maximum number of memory descriptors that can be allocated - at any one time. -\layout Description - -max_event_queues Maximum number of event queues that can be allocated at - any one time. -\layout Description - -max_atable_index Largest access control table index for this interface, - valid indexes range from zero to -\family typewriter -max_atable_index -\family default -, inclusive. -\layout Description - -max_ptable_index Largest Portal table index for this interface, valid indexes - range from zero to -\family typewriter -max_ptable_index -\family default -, inclusive. -\layout Standard -\noindent -The -\emph on -PtlNIInit -\emph default - function is used to initialized the Portals API for a network interface. - This function must be called at least once by each process before any other - operations that apply to the interface by any process or thread. - For subsequent calls to -\shape italic -PtlNIInit -\shape default - from within the same process (either by different threads or the same thread), - the desired limits will be ignored and the call will return the existing - NI handle. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INIT_DUP Indicates a duplicate initialization of -\family typewriter -interface -\family default -. - -\layout Description - -PTL_INIT_INV Indicates that -\family typewriter -interface -\family default - is not a valid network interface. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to initialize the - interface. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -pid -\family default - is not a valid process id. -\layout Description - -PTL_SEGV Indicates that -\family typewriter -actual -\family default -or -\family typewriter - handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the network interface to be initialized. - (See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - for a discussion of values used to identify network interfaces.) -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -pid -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the desired process id (for well known process ids). - The value -\family typewriter -PTL_PID_ANY -\family default - may be used to have the process id assigned by the underlying library. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -desired -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -If non-NULL, points to a structure that holds the desired limits. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -actual -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, the location pointed to by actual will hold the actual - limits. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the interface. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -The use of desired is implementation dependent. - In particular, an implementation may choose to ignore this argument. -\layout Subsection - -PtlNIFini -\begin_inset LatexCommand \label{sec:nifini} - -\end_inset - - -\layout LyX-Code - -int PtlNIFini( ptl_handle_ni_t interface ); -\layout Standard -\noindent -The -\emph on -PtlNIFini -\emph default - function is used to release the resources allocated for a network interface. - Once the -\emph on -PtlNIFini -\emph default - operation has been started, the results of pending API operations (e.g., - operations initiated by another thread) for this interface are undefined. - Similarly, the effects of incoming operations (puts and gets) or return - values (acknowledgements and replies) for this interface are undefined. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -A handle for the interface to shutdown. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlNIStatus -\begin_inset LatexCommand \label{sec:nistatus} - -\end_inset - - -\layout LyX-Code - -int PtlNIStatus( ptl_handle_ni_t interface, -\newline - ptl_sr_index_t status_register, -\newline - ptl_sr_value_t* status ); -\layout Standard -\noindent -The -\emph on -PtlNIStatus -\emph default - function returns the value of a status register for the specified interface. - (See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - for more information on status register indexes and status register values.) -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_INV_SR_INDX Indicates that -\family typewriter -status_register -\family default - is not a valid status register. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -status -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -status_register -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -An index for the status register to read. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -status -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the current value of the status - register. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -The only status register that must be defined is a drop count register ( -\family typewriter -PTL_SR_DROP_COUNT -\family default -). - Implementations may define additional status registers. - Identifiers for the indexes associated with these registers should start - with the prefix -\family typewriter -PTL_SR_ -\family default -. -\layout Subsection - -PtlNIDist -\layout LyX-Code - -int PtlNIDist( ptl_handle_ni_t interface, -\newline - ptl_process_id_t process, -\newline - unsigned long* distance ); -\layout Standard -\noindent -The -\emph on -PtlNIDist -\emph default - function returns the distance to another process using the specified interface. - Distances are only defined relative to an interface. - Distance comparisons between different interfaces on the same process may - be meaningless. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -process -\family default - is not a valid process identifier. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -distance -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -process -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -An identifier for the process whose distance is being requested. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -distance -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the distance to the remote - process. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -This function should return a static measure of distance. - Examples include minimum latency, the inverse of available bandwidth, or - the number of switches between the two endpoints. -\layout Subsection - -PtlNIHandle -\layout LyX-Code - -int PtlNIHandle( ptl_handle_any_t handle, -\newline - ptl_handle_ni_t* interface ); -\layout Standard -\noindent -The -\emph on -PtlNIHandle -\emph default - function returns a handle for the network interface with which the object - identified by -\family typewriter -handle -\family default - is associated. - If the object identified by -\family typewriter -handle -\family default - is a network interface, this function returns the same value it is passed. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_HANDLE Indicates that -\family typewriter -handle -\family default - is not a valid handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -interface -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the object. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the network interface - associated with -\family typewriter -handle -\family default -. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -Every handle should encode the network interface and the object id relative - to this handle. - Both are presumably encoded using integer values. -\layout Section - -User Identification -\begin_inset LatexCommand \label{sec:uid} - -\end_inset - - -\layout Standard - -Every process runs on behalf of a user. - -\layout Subsection - -PtlGetUid -\layout LyX-Code - -int PtlGetUid( ptl_handle_ni_t ni_handle, -\newline - ptl_uid_t* uid ); -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -ni_handle -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -interface -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A network interface handle. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -id -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the user id for the calling - process. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -Note that user identifiers are dependent on the network interface(s). - In particular, if a node has multiple interfaces, a process may have multiple - user identifiers. -\layout Section - -Process Identification -\begin_inset LatexCommand \label{sec:pid} - -\end_inset - - -\layout Standard - -Processes that use the Portals API, can be identified using a node id and - process id. - Every node accessible through a network interface has a unique node identifier - and every process running on a node has a unique process identifier. - As such, any process in the computing system can be identified by its node - id and process id. - -\layout Standard - -The Portals API defines a type, -\family typewriter -ptl_process_id_t -\family default - for representing process ids and a function, -\emph on -PtlGetId -\emph default -, which can be used to obtain the id of the current process. -\layout Comment - -The portals API does not include thread identifiers. - Messages are delivered to processes (address spaces) not threads (contexts - of execution). -\layout Subsection - -The Process Id Type -\begin_inset LatexCommand \label{sec:pid-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - ptl_nid_t nid; /* node id */ -\newline - ptl_pid_t pid; /* process id */ -\newline -} ptl_process_id_t; -\layout Standard -\noindent -The -\family typewriter -ptl_process_id_t -\family default - type uses two identifiers to represent a process id: a node id and a process - id. - -\layout Subsection - -PtlGetId -\begin_inset LatexCommand \label{sub:PtlGetId} - -\end_inset - - -\layout LyX-Code - -int PtlGetId( ptl_handle_ni_t ni_handle, -\newline - ptl_process_id_t* id ); -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -ni_handle -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -id -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A network interface handle. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -id -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the id for the calling process. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -Note that process identifiers are dependent on the network interface(s). - In particular, if a node has multiple interfaces, it may have multiple - node identifiers. -\layout Section - -Match List Entries and Match Lists -\begin_inset LatexCommand \label{sec:me} - -\end_inset - - -\layout Standard - -A match list is a chain of match list entries. - Each match list entry includes a memory descriptor and a set of match criteria. - The match criteria can be used to reject incoming requests based on process - id or the match bits provided in the request. - A match list is created using the -\emph on -PtlMEAttach -\emph default - or -\shape italic -PtlMEAttachAny -\shape default - functions, which create a match list consisting of a single match list - entry, attaches the match list to the specified Portal index, and returns - a handle for the match list entry. - Match entries can be dynamically inserted and removed from a match list - using the -\emph on -PtlMEInsert -\emph default - and -\emph on -PtlMEUnlink -\emph default - functions. -\layout Subsection - -PtlMEAttach -\begin_inset LatexCommand \label{sec:meattach} - -\end_inset - - -\layout LyX-Code - -typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t; -\newline - -\layout LyX-Code - -typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t; -\newline - -\layout LyX-Code - -int PtlMEAttach( ptl_handle_ni_t interface, -\newline - ptl_pt_index_t index, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_unlink_t unlink, -\newline - ptl_ins_pos_t position, -\newline - ptl_handle_me_t* handle ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_ins_pos_t -\family default - are used to control where a new item is inserted. - The value -\family typewriter -PTL_INS_BEFORE -\family default - is used to insert the new item before the current item or before the head - of the list. - The value -\family typewriter -PTL_INS_AFTER -\family default - is used to insert the new item after the current item or after the last - item in the list. - -\layout Standard - -The -\emph on -PtlMEAttach -\emph default - function creates a match list consisting of a single entry and attaches - this list to the Portal table for -\family typewriter -interface -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PTINDEX Indicates that -\family typewriter -index -\family default - is not a valid Portal table index. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match list entry. - -\layout Description - -PTL_ML_TOOLONG Indicates that the resulting match list is too long. - The maximum length for a match list is defined by the interface. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="7" columns="3"> -<features> -<column alignment="left" valignment="top" width="0.8in"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.75in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -index -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The Portal table index where the match list should be attached. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Specifies the match criteria for the process id of the requestor. - The constants -\family typewriter -PTL_PID_ANY -\family default - and -\family typewriter -PTL_NID_ANY -\family default - can be used to wildcard either of the ids in the -\family typewriter -ptl_process_id_t -\family default - structure. - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -match_bits, ignorebits -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Specify the match criteria to apply to the match bits in the incoming request. - The -\family typewriter -ignorebits -\family default - are used to mask out insignificant bits in the incoming match bits. - The resulting bits are then compared to the match list entry's match - bits to determine if the incoming request meets the match criteria. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -unlink -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Indicates the match list entry should be unlinked when the last memory descripto -r associated with this match list entry is unlinked. - (Note, the check for unlinking a match entry only occurs when a memory - descriptor is unlinked.) -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -position -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Indicates whether the new match entry should be prepended or appended to - the existing match list. - If there is no existing list, this argument is ignored and the new match - entry becomes the only entry in the list. - Allowed constants: -\family typewriter -PTL_INS_BEFORE -\family default -, -\family typewriter -PTL_INS_AFTER -\family default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - match list entry. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMEAttachAny -\begin_inset LatexCommand \label{sec:attachany} - -\end_inset - - -\layout LyX-Code - -int PtlMEAttachAny( ptl_handle_ni_t interface, -\newline - ptl_pt_index_t *index, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_unlink_t unlink, -\newline - ptl_handle_me_t* handle ); -\layout Standard - -The -\emph on -PtlMEAttachAny -\emph default - function creates a match list consisting of a single entry and attaches - this list to an unused Portal table entry for -\family typewriter -interface -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match list entry. - -\layout Description - -PTL_PT_FULL Indicates that there are no free entries in the Portal table. -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="4" columns="3"> -<features> -<column alignment="left" valignment="top" width="0.8in"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.75in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -index -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On succesfful return, this location will hold the Portal index where the - match list has been attached. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid, match_bits, ignorebits, unlink -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\shape italic -PtlMEAttach -\shape default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - match list entry. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMEInsert -\begin_inset LatexCommand \label{sec:meinsert} - -\end_inset - - -\layout LyX-Code - -int PtlMEInsert( ptl_handle_me_t current, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_ins_pos_t position, -\newline - ptl_handle_me_t* handle ); -\layout Standard - -The -\emph on -PtlMEInsert -\emph default - function creates a new match list entry and inserts this entry into the - match list containing -\family typewriter -current -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -current -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ML_TOOLONG Indicates that the resulting match list is too long. - The maximum length for a match list is defined by the interface. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match entry. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="4" columns="3"> -<features> -<column alignment="left" valignment="top" width="0.8in"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -current -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for a match entry. - The new match entry will be inserted immediately before or immediately - after this match entry. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid -\family default -, -\family typewriter -match_bits -\family default -, -\family typewriter -ignorebits -\family default -, -\family typewriter -unlink -\family default - -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\emph on -PtlMEAttach -\emph default - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -position -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Indicates whether the new match entry should be inserted before or after - the -\family typewriter -current -\family default - entry. - Allowed constants: -\family typewriter -PTL_INS_BEFORE -\family default -, -\family typewriter -PTL_INS_AFTER -\family default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\emph on -PtlMEAttach -\emph default -. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMEUnlink -\begin_inset LatexCommand \label{sec:meunlink} - -\end_inset - - -\layout LyX-Code - -int PtlMEUnlink( ptl_handle_me_t entry ); -\layout Standard -\noindent -The -\emph on -PtlMEUnlink -\emph default - function can be used to unlink a match entry from a match list. - This operation also releases any resources associated with the match entry - (including the associated memory descriptor). - It is an error to use the match entry handle after calling -\emph on -PtlMEUnlink -\emph default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -entry -\family default - is not a valid match entry handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -entry -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -A handle for the match entry to be unlinked. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -Memory Descriptors -\begin_inset LatexCommand \label{sec:md} - -\end_inset - - -\layout Standard - -A memory descriptor contains information about a region of an application - process' memory and an event queue where information about the operations - performed on the memory descriptor are recorded. - The Portals API provides two operations to create memory descriptors: -\emph on -PtlMDAttach -\emph default -, and -\emph on -PtlMDBind -\emph default -; an operation to update a memory descriptor, -\emph on -PtlMDUpdate -\emph default -; and an operation to unlink and release the resources associated with a - memory descriptor, -\emph on -PtlMDUnlink -\emph default -. -\layout Subsection - -The Memory Descriptor Type -\begin_inset LatexCommand \label{sec:md-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - void* start; -\newline - ptl_size_t length; -\newline - int threshold; -\newline - unsigned int max_offset; -\newline - unsigned int options; -\newline - void* user_ptr; -\newline - ptl_handle_eq_t eventq; -\newline -} ptl_md_t; -\layout Standard -\noindent -The -\family typewriter -ptl_md_t -\family default - type defines the application view of a memory descriptor. - Values of this type are used to initialize and update the memory descriptors. -\layout Subsubsection - -Members -\layout Description - -start,\SpecialChar ~ -length Specify the memory region associated with the memory descriptor. - The -\family typewriter -start -\family default - member specifies the starting address for the memory region and the -\family typewriter -length -\family default - member specifies the length of the region. - The -\family typewriter -start member -\family default - can be NULL provided that the -\family typewriter -length -\family default - member is zero. - (Zero length buffers are useful to record events.) There are no alignment - restrictions on the starting address or the length of the region; although, - unaligned messages may be slower (i.e., lower bandwidth and/or longer latency) - on some implementations. - -\layout Description - -threshold Specifies the maximum number of operations that can be performed - on the memory descriptor. - An operation is any action that could possibly generate an event (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - for the different types of events). - In the usual case, the threshold value is decremented for each operation - on the memory descriptor. - When the threshold value is zero, the memory descriptor is -\emph on -inactive -\emph default -, and does not respond to operations. - A memory descriptor can have an initial threshold value of zero to allow - for manipulation of an inactive memory descriptor by the local process. - A threshold value of -\family typewriter -PTL_MD_THRESH_INF -\family default - indicates that there is no bound on the number of operations that may be - applied to a memory descriptor. - Note that local operations (e.g., -\emph on -PtlMDUpdate -\emph default -) are not applied to the threshold count. - -\layout Description - -max_offset Specifies the maximum local offset of a memory descriptor. - When the local offset of a memory descriptor exceeds this maximum, the - memory descriptor becomes -\shape italic -inactive -\shape default - and does not respond to further operations. -\layout Description - -options Specifies the behavior of the memory descriptor. - There are five options that can be selected: enable put operations (yes - or no), enable get operations (yes or no), offset management (local or - remote), message truncation (yes or no), and acknowledgement (yes or no). - Values for this argument can be constructed using a bitwise or of the following - values: -\begin_deeper -\begin_deeper -\layout Description - -PTL_MD_OP_PUT Specifies that the memory descriptor will respond to -\emph on -put -\emph default - operations. - By default, memory descriptors reject -\emph on -put -\emph default - operations. - -\layout Description - -PTL_MD_OP_GET Specifies that the memory descriptor will respond to -\emph on -get -\emph default - operations. - By default, memory descriptors reject -\emph on -get -\emph default - operations. - -\layout Description - -PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory - region is provided by the incoming request. - By default, the offset is maintained locally. - When the offset is maintained locally, the offset is incremented by the - length of the request so that the next operation (put and/or get) will - access the next part of the memory region. -\layout Description - -PTL_MD_TRUNCATE Specifies that the length provided in the incoming request - can be reduced to match the memory available in the region. - (The memory available in a memory region is determined by subtracting the - offset from the length of the memory region.) By default, if the length - in the incoming operation is greater than the amount of memory available, - the operation is rejected. - -\layout Description - -PTL_MD_ACK_DISABLE Specifies that an acknowledgement should -\emph on -not -\emph default - be sent for incoming -\emph on -put -\emph default - operations, even if requested. - By default, acknowledgements are sent for -\emph on -put -\emph default - operations that request an acknowledgement. - Acknowledgements are never sent for -\emph on -get -\emph default - operations. - The value sent in the reply serves as an implicit acknowledgement. - -\end_deeper -\layout Standard - - -\series bold -Note -\series default -: It is not considered an error to have a memory descriptor that does not - respond to either -\emph on -put -\emph default - or -\emph on -get -\emph default - operations: Every memory descriptor responds to -\emph on -reply -\emph default - operations. - Nor is it considered an error to have a memory descriptor that responds - to both -\emph on -put -\emph default - and -\emph on -get -\emph default - operations. - -\end_deeper -\layout Description - -user_ptr A user-specified value that is associated with the memory descriptor. - The value does not need to be a pointer, but must fit in the space used - by a pointer. - This value (along with other values) is recorded in events associated with - operations on this memory descriptor. -\begin_inset Foot -collapsed true - -\layout Standard - -Tying the memory descriptor to a user-defined value can be useful when multiple - memory descriptor share the same event queue or when the memory descriptor - needs to be associated with a data structure maintained by the application. - For example, an MPI implementation can set the -\family typewriter -user_ptr -\family default - argument to the value of an MPI Request. - This direct association allows for processing of memory descriptor's by - the MPI implementation without a table lookup or a search for the appropriate - MPI Request. -\end_inset - - -\layout Description - -eventq A handle for the event queue used to log the operations performed - on the memory region. - If this argument is -\family typewriter -PTl_EQ_NONE -\family default -, operations performed on this memory descriptor are not logged. - -\layout Subsection - -PtlMDAttach -\begin_inset LatexCommand \label{sec:mdattach} - -\end_inset - - -\layout LyX-Code - -int PtlMDAttach( ptl_handle_me_t match, -\newline - ptl_md_t mem_desc, -\newline - ptl_unlink_t unlink_op, -\newline - ptl_unlink_t unlink_nofit, -\newline - ptl_handle_md_t* handle ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_unlink_t -\family default - are used to control whether an item is unlinked from a list. - The value -\family typewriter -PTL_UNLINK -\family default - enables unlinking. - The value -\family typewriter -PTL_RETAIN -\family default - disables unlinking. -\layout Standard - -The -\emph on -PtlMDAttach -\emph default - operation is used to create a memory descriptor and attach it to a match - list entry. - An error code is returned if this match list entry already has an associated - memory descriptor. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INUSE Indicates that -\family typewriter -match -\family default - already has a memory descriptor attached. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -match -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ILL_MD Indicates that -\family typewriter -mem_desc -\family default - is not a legal memory descriptor. - This may happen because the memory region defined in -\family typewriter -mem_desc -\family default - is invalid or because the network interface associated with the -\family typewriter -eventq -\family default - in -\family typewriter -mem_desc -\family default - is not the same as the network interface associated with -\family typewriter -match -\family default -. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - memory descriptor. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the match entry that the memory descriptor will be associated - with. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Provides initial values for the application visible parts of a memory descriptor. - Other than its use for initialization, there is no linkage between this - structure and the memory descriptor maintained by the API. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -unlink_op -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A flag to indicate whether the memory descriptor is unlinked when it becomes - inactive, either because the operation threshold drops to zero or because - the maximum offset has been exceeded. - (Note, the check for unlinking a memory descriptor only occurs after a - the completion of a successful operation. - If the threshold is set to zero during initialization or using -\emph on -PtlMDUpdate -\emph default -, the memory descriptor is -\series bold -not -\series default - unlinked.) -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -unlink_nofit -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A flag to indicate whether the memory descriptor is unlinked when the space - remaining in the memory descriptor is not sufficient for a matching operation. - If an incoming message arrives arrives at a memory descriptor that does - not have sufficient space and the -\series bold -PTL_MD_TRUNCATE -\series default - operation is not specified, the memory descriptor will be unlinked. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - memory descriptor. - The -\family typewriter -handle -\family default - argument can be NULL, in which case the handle will not be returned. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMDBind -\begin_inset LatexCommand \label{sec:mdbind} - -\end_inset - - -\layout LyX-Code - -int PtlMDBind( ptl_handle_ni_t interface, -\newline - ptl_md_t mem_desc, -\newline - ptl_handle_md_t* handle ); -\layout Standard -\noindent -The -\emph on -PtlMDBind -\emph default - operation is used to create a -\begin_inset Quotes eld -\end_inset - -free floating -\begin_inset Quotes erd -\end_inset - - memory descriptor, i.e., a memory descriptor that is not associated with - a match list entry. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ILL_MD Indicates that -\family typewriter -mem_desc -\family default - is not a legal memory descriptor. - This may happen because the memory region defined in -\family typewriter -mem_desc -\family default - is invalid or because the network interface associated with the -\family typewriter -eventq -\family default - in -\family typewriter -mem_desc -\family default - is not the same as the network interface, -\family typewriter -interface -\family default -. - -\layout Description - -PTL_INV_EQ Indicates that the event queue associated with -\family typewriter -mem_desc -\family default - is not valid. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - memory descriptor. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the network interface with which the memory descriptor will - be associated. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Provides initial values for the application visible parts of a memory descriptor. - Other than its use for initialization, there is no linkage between this - structure and the memory descriptor maintained by the API. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - memory descriptor. - The -\family typewriter -handle -\family default - argument must be a valid address and cannot be NULL. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMDUnlink -\begin_inset LatexCommand \label{sec:mdfree} - -\end_inset - - -\layout LyX-Code - -int PtlMDUnlink( ptl_handle_md_t mem_desc ); -\layout Standard -\noindent -The -\emph on -PtlMDUnlink -\emph default - function unlinks the memory descriptor from any match list entry it may - be linked to and releases the resources associated with a memory descriptor. - (This function does not free the memory region associated with the memory - descriptor.) This function also releases the resources associated with a - floating memory descriptor. - Only memory descriptors with no pending operations may be unlinked. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor handle. -\layout Description - -PTL_MD_INUSE Indicates that -\family typewriter -mem_desc -\family default - has pending operations and cannot be unlinked. -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor to be released. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMDUpdate -\begin_inset LatexCommand \label{sec:mdupdate} - -\end_inset - - -\layout LyX-Code - -int PtlMDUpdate( ptl_handle_md_t mem_desc, -\newline - ptl_md_t* old_md, -\newline - ptl_md_t* new_md, -\newline - ptl_handle_eq_t testq ); -\layout Standard -\noindent -The -\emph on -PtlMDUpdate -\emph default - function provides a conditional, atomic update operation for memory descriptors. - The memory descriptor identified by -\family typewriter -mem_desc -\family default - is only updated if the event queue identified by -\family typewriter -testq -\family default - is empty. - The intent is to only enable updates to the memory descriptor when no new - messages have arrived since the last time the queue was checked. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:exmpi} - -\end_inset - - for an example of how this function can be used. -\layout Standard - -If -\family typewriter -new -\family default - is not NULL the memory descriptor identified by handle will be updated - to reflect the values in the structure pointed to by -\family typewriter -new -\family default - if -\family typewriter -testq -\family default - has the value -\family typewriter -PTL_EQ_NONE -\family default - or if the event queue identified by -\family typewriter -testq -\family default - is empty. - If -\family typewriter -old -\family default - is not NULL, the current value of the memory descriptor identified by -\family typewriter -mem_desc -\family default - is recorded in the location identified by -\family typewriter -old -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_NOUPDATE Indicates that the update was not performed because -\family typewriter -testq -\family default - was not empty. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor handle. - -\layout Description - -PTL_ILL_MD Indicates that the value pointed to by -\family typewriter -new -\family default - is not a legal memory descriptor (e.g., the memory region specified by the - memory descriptor may be invalid). - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -testq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -new -\family default - or -\family typewriter -old -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="4" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor to update. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -old_md -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -If -\family typewriter -old_md -\family default - is not the value -\family typewriter -NULL -\family default -, the current value of the memory descriptor will be stored in the location - identified by -\family typewriter -old -\family default -_md. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -new_md -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -If -\family typewriter -new_md -\family default - is not the value -\family typewriter -NULL -\family default -, this argument provides the new values for the memory descriptor, if the - update is performed. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -testq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for an event queue used to predicate the update. - If -\family typewriter -testq -\family default - is equal to -\family typewriter -PTL_EQ_NONE -\family default -, the update is performed unconditionally. - Otherwise, the update is performed if and only if -\family typewriter -testq -\family default - is empty. - If the update is not performed, the function returns the value -\family typewriter -PTL_NOUPDATE -\family default -. - (Note, the -\family typewriter -testq -\family default - argument does not need to be the same as the event queue associated with - the memory descriptor.) -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Standard - -The conditional update can be used to ensure that the memory descriptor - has not changed between the time it was examined and the time it is updated. - In particular, it is needed to support an MPI implementation where the - activity of searching an unexpected message queue and posting a receive - must be atomic. -\layout Section - -Events and Event Queues -\begin_inset LatexCommand \label{sec:eq} - -\end_inset - - -\layout Standard - -Event queues are used to log operations performed on memory descriptors. - They can also be used to hold acknowledgements for completed -\emph on -put -\emph default - operations and to note when the data specified in a -\emph on -put -\emph default - operation has been sent (i.e., when it is safe to reuse the buffer that holds - this data). - Multiple memory descriptors can share a single event queue. -\layout Standard - -In addition to the -\family typewriter -ptl_handle_eq_t -\family default - type, the Portals API defines two types associated with events: The -\family typewriter - -\newline -ptl_event_kind_t -\family default - type defines the kinds of events that can be stored in an event queue. - The -\family typewriter -ptl_event_t -\family default - type defines a structure that holds the information associated with an - event. -\layout Standard - -The Portals API also provides four functions for dealing with event queues: - The -\emph on -PtlEQAlloc -\emph default - function is used to allocate the API resources needed for an event queue, - the -\emph on -PtlEQFree -\emph default - function is used to release these resources, the -\emph on -PtlEQGet -\emph default - function can be used to get the next event from an event queue, and the - -\emph on -PtlEQWait -\emph default - function can be used to block a process (or thread) until an event queue - has at least one event. -\layout Subsection - -Kinds of Events -\begin_inset LatexCommand \label{sec:ek-type} - -\end_inset - - -\layout LyX-Code - -typedef enum { -\newline - PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL, -\newline - PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL, -\newline - PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL, -\newline - PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL, -\newline - PTL_EVENT_ACK, -\newline - PTL_EVENT_UNLINK -\newline -} ptl_event_kind_t; -\layout Standard -\noindent -The Portals API defines fourteen types of events that can be logged in an - event queue: -\layout Description - -PTL_EVENT_GET_START A remote -\emph on -get -\emph default - operation has been started on the memory descriptor. - The memory region associated with this descriptor should not be altered - until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_GET_END A previously initiated -\emph on -get -\emph default - operation completed successfully. - This event is logged after the reply has been sent by the local node. - As such, the process could free the memory descriptor once it sees this - event. - -\layout Description - -PTL_EVENT_GET_FAIL A previously initiated -\emph on -get -\emph default - operation completed unsuccessfully. - This event is logged after the reply has been sent by the local node. - As such, the process could free the memory descriptor once it sees this - event. - -\layout Description - -PTL_EVENT_PUT_START A remote -\emph on -put -\emph default - operation has been started on the memory descriptor. - The memory region associated with this descriptor should should be considered - volatile until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_PUT_END A previously initiated -\emph on -put -\emph default - operation completed successfully. - The underlying layers will not alter the memory (on behalf of this operation) - once this event has been logged. - -\layout Description - -PTL_EVENT_PUT_FAIL A previously initiated -\emph on -put -\emph default - operation completed unsuccessfully. - The underlying layers will not alter the memory (on behalf of this operation) - once this event has been logged. - -\layout Description - -PTL_EVENT_REPLY_START A -\emph on -reply -\emph default - operation has been started on the memory descriptor. - -\layout Description - -PTL_EVENT_REPLY_END A previously initiated -\emph on -reply -\emph default - operation has completed successfully . - This event is logged after the data (if any) from the reply has been written - into the memory descriptor. - -\layout Description - -PTL_EVENT_REPLY_FAIL A previously initiated -\emph on -reply -\emph default - operation has completed unsuccessfully. - This event is logged after the data (if any) from the reply has been written - into the memory descriptor. - -\layout Description - -PTL_EVENT_ACK An -\emph on -acknowledgement -\emph default - was received. - This event is logged when the acknowledgement is received -\layout Description - -PTL_EVENT_SEND_START An outgoing -\emph on -send -\emph default - operation has been started. - The memory region associated with this descriptor should not be altered - until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_SEND_END A previously initiated -\emph on -send -\emph default - operation has completed successfully. - This event is logged after the entire buffer has been sent and it is safe - for the application to reuse the buffer. - -\layout Description - -PTL_EVENT_SEND_FAIL A previously initiated -\emph on -send -\emph default - operation has completed unsuccessfully. - The process can safely manipulate the memory or free the memory descriptor - once it sees this event. -\layout Description - -PTL_EVENT_UNLINK A memory descriptor associated with this event queue has - been automatically unlinked. - This event is not generated when a memory descriptor is explicitly unlinked - by calling -\shape italic -PtlMDUnlink -\shape default -. - This event does not decrement the threshold count. -\layout Subsection - -Event Ordering -\layout Standard - -The Portals API guarantees that a when a process initiates two operations - on a remote process, the operations will be initiated on the remote process - in the same order that they were initiated on the original process. - As an example, if process A intitates two -\emph on -put -\emph default - operations, -\emph on -x -\emph default - and -\emph on -y -\emph default -, on process B, the Portals API guarantees that process A will receive the - -\family typewriter -PTL_EVENT_SEND_START -\family default - events for -\emph on -x -\emph default - and -\emph on -y -\emph default - in the same order that process B receives the -\family typewriter -PTL_EVENT_PUT_START -\family default - events for -\emph on -x -\emph default - and -\emph on -y -\emph default -. - Notice that the API does not guarantee that the start events will be delivered - in the same order that process A initiated the -\emph on -x -\emph default - and -\emph on -y -\emph default - operations. - If process A needs to ensure the ordering of these operations, it should - include code to wait for the initiation of -\emph on -x -\emph default - before it initiates -\emph on -y -\emph default -. -\layout Subsection - -Failure Notification -\layout Standard - -Operations may fail to complete successfully; however, unless the node itself - fails, every operation that is started will eventually complete. - While an operation is in progress, the memory associated with the operation - should not be viewed (in the case of a put or a reply) or altered (in the - case of a send or get). - Operation completion, whether successful or unsuccessful, is final. - That is, when an operation completes, the memory associated with the operation - will no longer be read or altered by the operation. - A network interface can use the -\family typewriter -ptl_ni_fail_t -\family default - to define more specific information regarding the failure of the operation - and record this information in the -\family typewriter -ni_fail_type -\family default - field of the event. -\layout Subsection - -The Event Type -\begin_inset LatexCommand \label{sec:event-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - ptl_event_kind_t type; -\newline - ptl_process_id_t initiator; -\newline - ptl_uid_t uid; -\layout LyX-Code - - ptl_pt_index_t portal; -\newline - ptl_match_bits_t match_bits; -\newline - ptl_size_t rlength; -\newline - ptl_size_t mlength; -\newline - ptl_size_t offset; -\newline - ptl_handle_md_t md_handle; -\newline - ptl_md_t mem_desc; -\newline - ptl_hdr_data_t hdr_data; -\newline - ptl_seq_t link; -\newline - ptl_ni_fail_t ni_fail_type; -\newline - volatile ptl_seq_t sequence; -\newline -} ptl_event_t; -\layout Standard -\noindent -An event structure includes the following members: -\layout Description - -type Indicates the type of the event. - -\layout Description - -initiator The id of the initiator. - -\layout Description - -portal The Portal table index specified in the request. - -\layout Description - -match_bits A copy of the match bits specified in the request. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - for more information on match bits. - -\layout Description - -rlength The length (in bytes) specified in the request. - -\layout Description - -mlength The length (in bytes) of the data that was manipulated by the operation. - For truncated operations, the manipulated length will be the number of - bytes specified by the memory descriptor (possibly with an offset) operation. - For all other operations, the manipulated length will be the length of - the requested operation. - -\layout Description - -offset Is the displacement (in bytes) into the memory region that the operation - used. - The offset can be determined by the operation (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - -) for a remote managed memory descriptor, or by the local memory descriptor - (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -). - -\layout Description - -md_handle Is the handle to the memory descriptor associated with the event. -\layout Description - -mem_desc Is the state of the memory descriptor immediately after the event - has been processed. - -\layout Description - -hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -). - -\layout Description - -link The -\emph on -link -\emph default - member is used to link -\family typewriter -START -\family default - events with the -\family typewriter -END -\family default - or -\family typewriter -FAIL -\family default - event that signifies completion of the operation. - The -\emph on -link -\emph default - member will be the same for the two events associated with an operation. - The link member is also used to link an -\family typewriter -UNLINK -\family default - event with the event that caused the memory descriptor to be unlinked. -\layout Description - -sequence The sequence number for this event. - Sequence numbers are unique to each event. -\layout Comment - -The -\emph on -sequence -\emph default - member is the last member and is volatile to support SMP implementations. - When an event structure is filled in, the -\emph on -sequence -\emph default - member should be written after all other members have been updated. - Moreover, a memory barrier should be inserted between the updating of other - members and the updating of the -\emph on -sequence -\emph default - member. -\layout Subsection - -PtlEQAlloc -\begin_inset LatexCommand \label{sec:eqalloc} - -\end_inset - - -\layout LyX-Code - -int PtlEQAlloc( ptl_handle_ni_t interface, -\newline - ptl_size_t count, -\newline - ptl_handle_eq_t* handle ); -\layout Standard -\noindent -The -\emph on -PtlEQAlloc -\emph default - function is used to build an event queue. - -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - event queue. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface with which the event queue will be associated. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -count -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The number of events that can be stored in the event queue. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - event queue. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlEQFree -\begin_inset LatexCommand \label{sec:eqfree} - -\end_inset - - -\layout LyX-Code - -int PtlEQFree( ptl_handle_eq_t eventq ); -\layout Standard -\noindent -The -\emph on -PtlEQFree -\emph default - function releases the resources associated with an event queue. - It is up to the user to insure that no memory descriptors are associated - with the event queue once it is freed. - -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -A handle for the event queue to be released. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlEQGet -\begin_inset LatexCommand \label{sec:eqget} - -\end_inset - - -\layout LyX-Code - -int PtlEQGet( ptl_handle_eq_t eventq, -\newline - ptl_event_t* event ); -\layout Standard -\noindent -The -\emph on -PTLEQGet -\emph default - function is a nonblocking function that can be used to get the next event - in an event queue. - The event is removed from the queue. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at - least one event between this event and the last event obtained (using -\emph on -PtlEQGet -\emph default - or -\emph on -PtlEQWait -\emph default -) from this event queue has been dropped due to limited space in the event - queue. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_EQ_EMPTY Indicates that -\family typewriter -eventq -\family default - is empty or another thread is waiting on -\emph on -PtlEQWait -\emph default -. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -event -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the event queue. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -event -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the values associated with - the next event in the event queue. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlEQWait -\begin_inset LatexCommand \label{sec:eqwait} - -\end_inset - - -\layout LyX-Code - -int PtlEQWait( ptl_handle_eq_t eventq, -\newline - ptl_event_t* event ); -\layout Standard -\noindent -The -\emph on -PTLEQWait -\emph default - function can be used to block the calling process (thread) until there - is an event in an event queue. - This function also returns the next event in the event queue and removes - this event from the queue. - This is the only blocking operation in the Portals 3.2 API. - In the event that multiple threads are waiting on the same event queue, - PtlEQWait is guaranteed to wake exactly one thread, but the order in which - they are awakened is not specified. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at - least one event between this event and the last event obtained (using -\emph on -PtlEQGet -\emph default - or -\emph on -PtlEQWait -\emph default -) from this event queue has been dropped due to limited space in the event - queue. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -event -\family default - is not a legal address. - queue handle. - -\layout Subsubsection - -Arguments -\layout Standard -\noindent - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the event queue to wait on. - The calling process (thread) will be blocked until -\family typewriter -eventq -\family default - is not empty. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -event -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the values associated with - the next event in the event queue. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -The Access Control Table -\begin_inset LatexCommand \label{sec:ac} - -\end_inset - - -\layout Standard - -Processes can use the access control table to control which processes are - allowed to perform operations on Portal table entries. - Each communication interface has a Portal table and an access control table. - The access control table for the default interface contains an entry at - index zero that allows all processes with the same user id to communicate. - Entries in the access control table can be manipulated using the -\emph on -PtlACEntry -\emph default - function. -\layout Subsection - -PtlACEntry -\begin_inset LatexCommand \label{sec:acentry} - -\end_inset - - -\layout LyX-Code - -int PtlACEntry( ptl_handle_ni_t interface, -\newline - ptl_ac_index_t index, -\newline - ptl_process_id_t matchid, -\newline - ptl_uid_t user_id, -\newline - ptl_pt_index_t portal ); -\layout Standard -\noindent -The -\emph on -PtlACEntry -\emph default - function can be used to update an entry in the access control table for - an interface. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_AC_INV_INDEX Indicates that -\family typewriter -index -\family default - is not a valid access control table index. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_PT_INV_INDEX Indicates that -\family typewriter -portal -\family default - is not a valid Portal table index. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the interface to use. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -index -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index of the entry in the access control table to update. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -matchid -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the process(es) that are allowed to perform operations. - The constants -\family typewriter -PTL_PID_ANY -\family default - and -\family typewriter -PTL_NID_ANY -\family default - can be used to wildcard either of the ids in the -\family typewriter -ptl_process_id_t -\family default - structure. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -user_id -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the user that is allowed to perform operations. - The value -\family typewriter -PTL_UID_ANY -\family default - can be used to wildcard the user. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the Portal index(es) that can be used. - The value -\family typewriter -PTL_PT_INDEX_ANY -\family default - can be used to wildcard the Portal index. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -Data Movement Operations -\begin_inset LatexCommand \label{sec:datamovement} - -\end_inset - - -\layout Standard - -The Portals API provides two data movement operations: -\emph on -PtlPut -\emph default - and -\emph on -PtlGet -\emph default -. -\layout Subsection - -PtlPut -\begin_inset LatexCommand \label{sec:put} - -\end_inset - - -\layout LyX-Code - -typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t; -\newline - -\newline -int PtlPut( ptl_handle_md_t mem_desc, -\newline - ptl_ack_req_t ack_req, -\newline - ptl_process_id_t target, -\newline - ptl_pt_index_t portal, -\newline - ptl_ac_index_t cookie, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_size_t offset, -\newline - ptl_hdr_data_t hdr_data ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_ack_req_t -\family default - are used to control whether an acknowledgement should be sent when the - operation completes (i.e., when the data has been written to a memory descriptor - of the -\family typewriter -target -\family default - process). - The value -\family typewriter -PTL_ACK_REQ -\family default - requests an acknowledgement, the value -\family typewriter -PTL_NOACK_REQ -\family default - requests that no acknowledgement should be generated. -\layout Standard - -The -\emph on -PtlPut -\emph default - function initiates an asynchronous put operation. - There are several events associated with a put operation: initiation of - the send on the local node ( -\family typewriter -PTL_EVENT_SEND_START -\family default -), completion of the send on the local node ( -\family typewriter -PTL_EVENT_SEND_END -\family default - or -\family typewriter -PTL_EVENT_SEND_FAIL -\family default -), and, when the send completes successfully, the receipt of an acknowledgement - ( -\family typewriter -PTL_EVENT_ACK -\family default -) indicating that the operation was accepted by the target. - These events will be logged in the event queue associated with the memory - descriptor ( -\family typewriter -mem_desc -\family default -) used in the put operation. - Using a memory descriptor that does not have an associated event queue - results in these events being discarded. - In this case, the application must have another mechanism (e.g., a higher - level protocol) for determining when it is safe to modify the memory region - associated with the memory descriptor. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -target -\family default - is not a valid process id. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="8" columns="3"> -<features> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor that describes the memory to be sent. - If the memory descriptor has an event queue associated with it, it will - be used to record events when the message has been sent (PTL_EVENT_SEND_START, - PTL_EVENT_SEND_END). - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ack_req -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Controls whether an acknowledgement event is requested. - Acknowledgements are only sent when they are requested by the initiating - process -\series bold -and -\series default - the memory descriptor has an event queue -\series bold -and -\series default - the target memory descriptor enables them. - Allowed constants: -\family typewriter -PTL_ACK_REQ -\family default -, -\family typewriter -PTL_NOACK_REQ -\family default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A process id for the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index in the remote Portal table. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index into the access control table of the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The match bits to use for message selection at the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The offset into the target memory descriptor (only used when the target - memory descriptor has the -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default - option set). -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -hdr_data -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -64 bits of user data that can be included in message header. - This data is written to an event queue entry at the target if an event - queue is present on the matching memory descriptor. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlGet -\begin_inset LatexCommand \label{sec:get} - -\end_inset - - -\layout LyX-Code - -int PtlGet( ptl_handle_md_t mem_desc, -\newline - ptl_process_id_t target, -\newline - ptl_pt_index_t portal, -\newline - ptl_ac_index_t cookie, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_size_t offset ); -\layout Standard -\noindent -The -\emph on -PtlGet -\emph default - function initiates a remote read operation. - There are two event pairs associated with a get operation , when the data - is sent from the remote node, a -\family typewriter -PTL_EVENT_GET{START|END} -\family default - event pair is registered on the remote node; and when the data is returned - from the remote node a -\family typewriter -PTL_EVENT_REPLY{START|END} -\family default - event pair is registered on the local node. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -target -\family default - is not a valid process id. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="6" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor that describes the memory into which - the requested data will be received. - The memory descriptor can have an event queue associated with it to record - events, such as when the message receive has started ( -\family typewriter -PTL_EVENT_REPLY -\family default -_ -\family typewriter -START -\family default -). -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A process id for the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index in the remote Portal table. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index into the access control table of the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The match bits to use for message selection at the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The offset into the target memory descriptor (only used when the target - memory descriptor has the -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default - option set). -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -Summary -\layout Standard - - -\begin_inset LatexCommand \label{sec:summary} - -\end_inset - - We conclude this section by summarizing the names introduced by the Portals - 3.2 API. - We start by summarizing the names of the types introduced by the API. - This is followed by a summary of the functions introduced by the API. - Which is followed by a summary of the function return codes. - Finally, we conclude with a summary of the other constant values introduced - by the API. -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - - presents a summary of the types defined by the Portals API. - The first column in this table gives the type name, the second column gives - a brief description of the type, the third column identifies the section - where the type is defined, and the fourth column lists the functions that - have arguments of this type. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Types Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:types} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\noindent - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="25" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="2in"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="2.2in"> -<row bottomline="true"> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Meaning -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Sect -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Functions -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -indexes for an access control table -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:index-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlACEntry, PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -acknowledgement request types -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlPut -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -kinds of events -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -information about events -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlEQGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -plt_seq_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -event sequence number -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlEQGet, PtlEQWait -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_any_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for any object -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIHandle -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_eq_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for event queues -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for memory descriptors -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert, - PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_me_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for match entries -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_ni_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for network interfaces -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut, - PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_nid_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -node identifiers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlGetId,PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -process identifier -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlGetId, PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -user indentifier -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlGetUid, PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -insertion position (before or after) -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_interface_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -identifiers for network interfaces -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -match (and ignore) bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mb-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_md_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -memory descriptors -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ni_fail_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -network interface-specific failures -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlEQGet, PtlEQWait -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -process identifiers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:pid-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -indexes for Portal tables -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:index-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -sizes -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:size-t} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlEQAlloc, PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -indexes for status registers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_value_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -values in status registers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -unlink options -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - - presents a summary of the functions defined by the Portals API. - The first column in this table gives the name for the function, the second - column gives a brief description of the operation implemented by the function, - and the third column identifies the section where the function is defined. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Functions Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:func} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="24" columns="3"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Operation -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Section -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlACEntry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - update an entry in an access control table -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ac} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQAlloc -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQGet -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the next event from an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQFree -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - release the resources for an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQWait -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - wait for a new event in an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlFini -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - shutdown the Portals API -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:init} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlGet -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - perform a get operation -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlGetId -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the id for the current process -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlInit -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - initialize the Portals API -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:init} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDAttach -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create a memory descriptor and attach it to a match entry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDBind -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create a free-floating memory descriptor -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDUnlink -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - remove a memory descriptor from a list and release its resources -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDUpdate -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - update a memory descriptor -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMEAttach -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -create a match entry and attach it to a Portal table -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlMEAttachAny -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -create a match entry and attach it to a free Portal table entry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:attachany} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMEInsert -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create a match entry and insert it in a list -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMEUnlink -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - remove a match entry from a list and release its resources -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIDist -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the distance to another process -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIFini -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - shutdown a network interface -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIHandle -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the network interface handle for an object -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIInit -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - initialize a network interface -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIStatus -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - read a network interface status register -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlPut -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - perform a put operation -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - - -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - - summarizes the return codes used by functions defined by the Portals API. - All of these constants are integer values. - The first column of this table gives the symbolic name for the constant, - the second column gives a brief description of the value, and the third - column identifies the functions that can return this value. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Function Return Codes for the Portals 3.2 API -\begin_inset LatexCommand \label{tab:retcodes} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="27" columns="3"> -<features> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="2.6in"> -<row bottomline="true"> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Meaning -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Functions -\series default - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_AC_INV_INDEX -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid access control table index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_DROPPED -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -at least one event has been dropped -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlEQGet, PtlWait -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_EMPTY -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -no events available in an event queue -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlEQGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -error during initialization or cleanup -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlInit, PtlFini -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ILL_MD -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -illegal memory descriptor values -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach, PtlMDBind, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INIT_DUP -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -duplicate initialization of an interface -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INIT_INV -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -initialization of an invalid interface -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INUSE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -the ME already has an MD -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_ASIZE -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid access control table size -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_EQ -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid event queue handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDUpdate, PtlEQFree, PtlEQGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_HANDLE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIHandle -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_MD -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid memory descriptor handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDUnlink, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_ME -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid match entry handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_NI -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid network interface handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_PROC -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid process identifier -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_PTINDEX -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid Portal table index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlMEAttach -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_REG -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid status register -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_SR_INDX -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid status register index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ML_TOOLONG -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match list too long -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlMEAttach, PtlMEInsert -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_INUSE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -MD has pending operations -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlMDUnlink -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOINIT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -uninitialized API -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\emph on -all -\emph default -, except PtlInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOSPACE -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -insufficient memory -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOUPDATE -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - no update was performed -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PT_FULL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -Portal table is full -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlMEAttachAny -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_OK -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - success -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\emph on -all -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_SEGV -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -addressing violation -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate, - PtlEQAlloc, PtlEQGet, PtlEQWait -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - - summarizes the remaining constant values introduced by the Portals API. - The first column in this table presents the symbolic name for the constant, - the second column gives a brief description of the value, the third column - identifies the type for the value, and the fourth column identifies the - sections in which the value is mentioned. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Other Constants Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:oconsts} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="36" columns="5"> -<features> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Meaning -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Base type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Intr. -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Ref. -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ACK_REQ -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -request an acknowledgement -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_NONE -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -a NULL event queue handle -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_eq_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -get event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -get event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -get event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -put event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -put event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -put event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -reply event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -reply event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -reply event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -acknowledgement event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -acknowledgement event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -acknowledgement event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -send event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -send event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -send event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_UNLINK -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -unlink event -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PID_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for process id fields -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NID_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for node id fields -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_nid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_UID_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for user id -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_IFACE_DEFAULT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -default interface -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_interface_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INS_AFTER -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -insert after -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INS_BEFORE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -insert before -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_ACK_DISABLE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to disable acknowledgements -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_MANAGE_REMOTE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable the use of remote offsets -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_OP_GET -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable get operations -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_OP_PUT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable put operations -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_THRESH_INF -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -infinite threshold for a memory descriptor -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_TRUNCATE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable truncation of a request -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOACK_REQ -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -request no acknowledgement -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PT_INDEX_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for Portal indexes -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_RETAIN -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -disable unlinking -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_SR_DROP_COUNT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -index for the dropped count register -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_UNLINK -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -enable unlinking -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Chapter - -The Semantics of Message Transmission -\begin_inset LatexCommand \label{sec:semantics} - -\end_inset - - -\layout Standard - -The portals API uses four types of messages: put requests, acknowledgements, - get requests, and replies. - In this section, we describe the information passed on the wire for each - type of message. - We also describe how this information is used to process incoming messages. -\layout Section - -Sending Messages -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:put-wire} - -\end_inset - - summarizes the information that is transmitted for a put request. - The first column provides a descriptive name for the information, the second - column provides the type for this information, the third column identifies - the source of the information, and the fourth column provides additional - notes. - Most information that is transmitted is obtained directly from the -\emph on -PtlPut -\emph default - operation. - Notice that the handle for the memory descriptor used in the -\emph on -PtlPut -\emph default - operation is transmitted even though this value cannot be interpreted by - the target. - A value of anything other than -\family typewriter -PTL_MD_NONE -\family default -, is interpreted as a request for an acknowledgement. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Put Request -\begin_inset LatexCommand \label{tab:put-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="12" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -\emph on -PtlPut -\emph default - arg -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -indicates a put request -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -user -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -cookie -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -no ack if -\family typewriter -PTL_MD_NONE -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -length -\family default - member -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -data -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family roman -\emph on -bytes -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -start -\family default - and -\family typewriter -length -\family default - members -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:ack-wire} - -\end_inset - - summarizes the information transmitted in an acknowledgement. - Most of the information is simply echoed from the put request. - Notice that the initiator and target are obtained directly from the put - request, but are swapped in generating the acknowledgement. - The only new piece of information in the acknowledgement is the manipulated - length which is determined as the put request is satisfied. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in an Acknowledgement -\begin_inset LatexCommand \label{tab:ack-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="10" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Put Information -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - indicates an acknowledgement -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_handle_md_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - requested length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - manipulated length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - obtained from the operation -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:get-wire} - -\end_inset - - summarizes the information that is transmitted for a get request. - Like the information transmitted in a put request, most of the information - transmitted in a get request is obtained directly from the -\emph on -PtlGet -\emph default - operation. - Unlike put requests, get requests do not include the event queue handle. - In this case, the reply is generated whenever the operation succeeds and - the memory descriptor must not be unlinked until the reply is received. - As such, there is no advantage to explicitly sending the event queue handle. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Get Request -\begin_inset LatexCommand \label{tab:get-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="11" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -\emph on -PtlGet -\emph default - argument -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -indicates a get operation -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -user -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -cookie -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -length -\family default - member -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:reply-wire} - -\end_inset - - summarizes the information transmitted in a reply. - Like an acknowledgement, most of the information is simply echoed from - the get request. - The initiator and target are obtained directly from the get request, but - are swapped in generating the acknowledgement. - The only new information in the acknowledgement are the manipulated length - and the data, which are determined as the get request is satisfied. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Reply -\begin_inset LatexCommand \label{tab:reply-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="11" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Put Information -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -indicates an acknowledgement -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -requested length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -manipulated length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -obtained from the operation -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -data -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\emph on -bytes -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -obtained from the operation -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Section - -Receiving Messages -\begin_inset LatexCommand \label{sec:receiving} - -\end_inset - - -\layout Standard - -When an incoming message arrives on a network interface, the communication - system first checks that the target process identified in the request is - a valid process that has initialized the network interface (i.e., that the - target process has a valid Portal table). - If this test fails, the communication system discards the message and increment -s the dropped message count for the interface. - The remainder of the processing depends on the type of the incoming message. - Put and get messages are subject to access control checks and translation - (searching a match list), while acknowledgement and reply messages bypass - the access control checks and the translation step. -\layout Standard - -Acknowledgement messages include a handle for the memory descriptor used - in the original -\emph on -PtlPut -\emph default - operation. - This memory descriptor will identify the event queue where the event should - be recorded. - Upon receipt of an acknowledgement, the runtime system only needs to confirm - that the memory descriptor and event queue still exist and that there is - space for another event. - Should the any of these conditions fail, the message is simply discarded - and the dropped message count for the interface is incremented. - Otherwise, the system builds an acknowledgement event from the information - in the acknowledgement message and adds it to the event queue. -\layout Standard - -Reception of reply messages is also relatively straightforward. - Each reply message includes a handle for a memory descriptor. - If this descriptor exists, it is used to receive the message. - A reply message will be dropped if the memory descriptor identified in - the request doesn't exist. - In either of this case, the dropped message count for the interface is - incremented. - These are the only reasons for dropping reply messages. - Every memory descriptor accepts and truncates incoming reply messages, - eliminating the other potential reasons for rejecting a reply message. -\layout Standard - -The critical step in processing an incoming put or get request involves - mapping the request to a memory descriptor. - This step starts by using the Portal index in the incoming request to identify - a list of match entries. - This list of match entries is searched in order until a match entry is - found whose match criteria matches the match bits in the incoming request - and whose memory descriptor accepts the request. -\layout Standard - -Because acknowledge and reply messages are generated in response to requests - made by the process receiving these messages, the checks performed by the - runtime system for acknowledgements and replies are minimal. - In contrast, put and get messages are generated by remote processes and - the checks performed for these messages are more extensive. - Incoming put or get messages may be rejected because: -\layout Itemize - -the Portal index supplied in the request is not valid; -\layout Itemize - -the cookie supplied in the request is not a valid access control entry; - -\layout Itemize - -the access control entry identified by the cookie does not match the identifier - of the requesting process; -\layout Itemize - -the access control entry identified by the access control entry does not - match the Portal index supplied in the request; or -\layout Itemize - -the match bits supplied in the request do not match any of the match entries - with a memory descriptor that accepts the request. - -\layout Standard - -In all cases, if the message is rejected, the incoming message is discarded - and the dropped message count for the interface is incremented. -\layout Standard - -A memory descriptor may reject an incoming request for any of the following - reasons: -\layout Itemize - -the -\family typewriter -PTL_MD_PUT -\family default - or -\family typewriter -PTL_MD_GET -\family default - option has not been enabled and the operation is put or get, respectively; - -\layout Itemize - -the length specified in the request is too long for the memory descriptor - and the -\family typewriter -PTL_MD_TRUNCATE -\family default - option has not been enabled. -\layout Chapter - -Examples -\begin_inset LatexCommand \label{sec:examples} - -\end_inset - - -\layout Comment - -The examples presented in this chapter have not been updated to reflect - the current API. -\layout Standard - -In this section we present several example to illustrate expected usage - patterns for the Portals 3.2 API. - The first example describes how to implement parallel servers using the - features of the Portals 3.2 API. - This example covers the access control list and the use of remote managed - offsets. - The second example presents an approach to dealing with dropped requests. - This example covers aspects of match lists and memory descriptors. - The final example covers message reception in MPI. - This example illustrates more sophisticated uses of matching and a procedure - to update a memory descriptor. -\layout Section - -Parallel File Servers -\begin_inset LatexCommand \label{sec:expfs} - -\end_inset - - -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:file} - -\end_inset - - illustrates the logical structure of a parallel file server. - In this case, the parallel server consists of four servers that stripe - application data across four disks. - We would like to present applications with the illusion that the file server - is a single entity. - We will assume that all of the processes that constitute the parallel server - have the same user id. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename file.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 196pt - lyxheight 147pt -\end_inset - - -\layout Caption - -Parallel File Server -\begin_inset LatexCommand \label{fig:file} - -\end_inset - - -\end_inset - - -\layout Standard - -When an application establishes a connection to the parallel file server, - it will allocate a Portal and access control list entry for communicating - with the server. - The access control list entry will include the Portal and match any process - in the parallel file server's, so all of the file server processes will - have access to the portal. - The Portal information and access control entry will be sent to the file - server at this time. - If the application and server need to have multiple, concurrent I/O operations, - they can use additional portals or match entries to keep the operations - from interfering with one another. -\layout Standard - -When an application initiates an I/O operation, it first builds a memory - descriptor that describes the memory region involved in the operation. - This memory descriptor will enable the appropriate operation (put for read - operations and get for write operations) and enable the use of remote offsets - (this lets the servers decide where their data should be placed in the - memory region). - After creating the memory descriptor and linking it into the appropriate - Portal entry, the application sends a read or write request (using -\emph on -PtlPut -\emph default -) to one of the file server processes. - The file server processes can then use put or get operations with the appropria -te offsets to fill or retrieve the contents of the application's buffer. - To know when the operation has completed, the application can add an event - queue to the memory descriptor and add up the lengths of the remote operations - until the sum is the size of the requested I/O operation. -\layout Section - -Dealing with Dropped Requests -\begin_inset LatexCommand \label{sec:exdrop} - -\end_inset - - -\layout Standard - -If a process does not anticipate unexpected requests, they will be discarded. - Applications using the Portals API can query the dropped count for the - interface to determine the number of requests that have been dropped (see - Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - -). - While this approach minimizes resource consumption, it does not provide - information that might be critical in debugging the implementation of a - higher level protocol. -\layout Standard - -To keep track of more information about dropped requests, we use a memory - descriptor that truncates each incoming request to zero bytes and logs - the -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - operations in an event queue. - Note that the operations are not dropped in the Portals sense, because - the operation succeeds. -\layout Standard - -The following code fragment illustrates an implementation of this approach. - In this case, we assume that a thread is launched to execute the function - -\family typewriter -watch_drop -\family default -. - This code starts by building an event queue to log truncated operations - and a memory descriptor to truncate the incoming requests. - This example only captures -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - requests for a single portal. - In a more realistic situation, the memory descriptor would be appended - to the match list for every portal. - We also assume that the thread is capable of keeping up with the -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - requests. - If this is not the case, we could use a finite threshold on the memory - descriptor to capture the first few dropped requests. -\layout LyX-Code - - -\size small -#include <stdio.h> -\newline -#include <stdlib.h> -\newline -#include <portals.h> -\newline - -\newline -#define DROP_SIZE 32 /* number of dropped requests to track */ -\newline - -\newline -int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) { -\newline - ptl_handle_eq_t drop_events; -\newline - ptl_event_t event; -\newline - ptl_handle_md_t drop_em; -\newline - ptl_md_t drop_desc; -\newline - ptl_process_id_t any_proc; -\newline - ptl_handle_me_t match_any; -\newline - -\newline - /* create the event queue */ -\newline - if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) { -\newline - fprintf( stderr, "Couldn't create the event queue -\backslash -n" ); -\newline - exit( 1 ); -\newline - } -\newline - -\newline - /* build a match entry */ -\newline - any_proc.nid = PTL_ID_ANY; -\newline - any_proc.pid = PTL_ID_ANY; -\newline - PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN, -\newline - &match_any ); -\newline - -\newline - /* create the memory descriptor */ -\newline - drop_desc.start = NULL; -\newline - drop_desc.length = 0; -\newline - drop_desc.threshold = PTL_MD_THRESH_INF; -\newline - drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE; -\newline - drop_desc.user_ptr = NULL; -\newline - drop_desc.eventq = drop_events; -\newline - if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) { -\newline - fprintf( stderr, "Couldn't create the memory descriptor -\backslash -n" ); -\newline - exit( 1 ); -\newline - } -\newline - -\newline - /* watch for "dropped" requests */ -\newline - while( 1 ) { -\newline - if( PtlEQWait( drop_events, &event ) != PTL_OK ) break; -\newline - fprintf( stderr, "Dropped request from gid = event.initiator.gid, - event.initiator.rid ); -\newline - } -\newline -} -\layout Section - -Message Transmission in MPI -\begin_inset LatexCommand \label{sec:exmpi} - -\end_inset - - -\layout Standard - -We conclude this section with a fairly extensive example that describes - an approach to implementing message transmission for MPI. - Like many MPI implementations, we distinguish two message transmission - protocols: a short message protocol and a long message protocol. - We use the constant -\family typewriter -MPI_LONG_LENGTH -\family default - to determine the size of a long message. -\layout Standard - -For small messages, the sender simply sends the message and presumes that - the message will be received (i.e., the receiver has allocated a memory region - to receive the message body). - For large messages, the sender also sends the message, but does not presume - that the message body will be saved. - Instead, the sender builds a memory descriptor for the message and enables - get operations on this descriptor. - If the target does not save the body of the message, it will record an - event for the put operation. - When the process later issues a matching MPI receive, it will perform a - get operation to retrieve the body of the message. -\layout Standard - -To facilitate receive side matching based on the protocol, we use the most - significant bit in the match bits to indicate the protocol: 1 for long - messages and 0 for short messages. -\layout Standard - -The following code presents a function that implements the send side of - the protocol. - The global variable -\family typewriter -EndGet -\family default - is the last match entry attached to the Portal index used for posting long - messages. - This entry does not match any incoming requests (i.e., the memory descriptor - rejects all get operations) and is built during initialization of the MPI - library. - The other global variable, -\family typewriter -MPI_NI -\family default -, is a handle for the network interface used by the MPI implementation. -\layout LyX-Code - - -\size small -extern ptl_handle_me_t EndGet; -\newline -extern ptl_handle_ni_t MPI_NI; -\newline - -\newline -void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq, -\newline - ptl_process_id target, ptl_match_bits_t match ) -\newline -{ -\newline - ptl_handle_md_t send_handle; -\newline - ptl_md_t mem_desc; -\newline - ptl_ack_req_t want_ack; -\newline - -\newline - mem_desc.start = buf; -\newline - mem_desc.length = len; -\newline - mem_desc.threshold = 1; -\newline - mem_desc.options = PTL_MD_GET_OP; -\newline - mem_desc.user_ptr = data; -\newline - mem_desc.eventq = eventq; -\newline - -\newline - if( len >= MPI_LONG_LENGTH ) { -\newline - ptl_handle_me_t me_handle; -\newline - -\newline - /* add a match entry to the end of the get list */ -\newline - PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet, - &me_handle ); -\newline - PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL ); -\newline - -\newline - /* we want an ack for long messages */ -\newline - want_ack = PTL_ACK_REQ; -\newline - -\newline - /* set the protocol bit to indicate that this is a long message - */ -\newline - match |= 1<<63; -\newline - } else { -\newline - /* we don't want an ack for short messages */ -\newline - want_ack = PTL_ACK_REQ; -\newline - -\newline - /* set the protocol bit to indicate that this is a short message - */ -\newline - match &= ~(1<<63); -\newline - } -\newline - -\newline - /* create a memory descriptor and send it */ -\newline - PtlMDBind( MPI_NI, mem_desc, &send_handle ); -\newline - PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match, - 0 ); -\newline -} -\layout Standard - -The -\emph on -MPISend -\emph default - function returns as soon as the message has been scheduled for transmission. - The event queue argument, -\family typewriter -eventq -\family default -, can be used to determine the disposition of the message. - Assuming that -\family typewriter -eventq -\family default - is not -\family typewriter -PTL_EQ_NONE -\family default -, a -\family typewriter -PTL_EVENT_SENT -\family default - event will be recorded for each message as the message is transmitted. - For small messages, this is the only event that will be recorded in -\family typewriter -eventq -\family default -. - In contrast, long messages include an explicit request for an acknowledgement. - If the -\family typewriter -target -\family default - process has posted a matching receive, the acknowledgement will be sent - as the message is received. - If a matching receive has not been posted, the message will be discarded - and no acknowledgement will be sent. - When the -\family typewriter -target -\family default - process later issues a matching receive, the receive will be translated - into a get operation and a -\family typewriter -PTL_EVENT_GET -\family default - event will be recorded in -\family typewriter -eventq -\family default -. -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:mpi} - -\end_inset - - illustrates the organization of the match list used for receiving MPI messages. - The initial entries (not shown in this figure) would be used to match the - MPI receives that have been preposted by the application. - The preposted receives are followed by a match entry, -\emph on -RcvMark -\emph default -, that marks the boundary between preposted receives and the memory descriptors - used for -\begin_inset Quotes eld -\end_inset - -unexpected -\begin_inset Quotes erd -\end_inset - - messages. - The -\emph on -RcvMark -\emph default - entry is followed by a small collection of match entries that match unexpected - -\begin_inset Quotes eld -\end_inset - -short -\begin_inset Quotes erd -\end_inset - - messages, i.e., messages that have a 0 in the most significant bit of their - match bits. - The memory descriptors associated with these match entries will append - the incoming message to the associated memory descriptor and record an - event in an event queue for unexpected messages. - The unexpected short message matching entries are followed by a match entry - that will match messages that were not matched by the preceding match entries, - i.e., the unexpected long messages. - The memory descriptor associated with this match entry truncates the message - body and records an event in the event queue for unexpected messages. - Note that of the memory descriptors used for unexpected messages share - a common event queue. - This makes it possible to process the unexpected messages in the order - in which they arrived, regardless of. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename mpi.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 389pt - lyxheight 284pt -\end_inset - - -\layout Caption - -Message Reception in MPI -\begin_inset LatexCommand \label{fig:mpi} - -\end_inset - - -\end_inset - - -\layout Standard - -When the local MPI process posts an MPI receive, we must first search the - events unexpected message queue to see if a matching message has already - arrived. - If no matching message is found, a match entry for the receive is inserted - before the -\emph on -RcvMark -\emph default - entry--after the match entries for all of the previously posted receives - and before the match entries for the unexpected messages. - This ensures that preposted receives are matched in the order that they - were posted (a requirement of MPI). - -\layout Standard - -While this strategy respects the temporal semantics of MPI, it introduces - a race condition: a matching message might arrive after the events in the - unexpected message queue have been searched, but before the match entry - for the receive has been inserted in the match list. - -\layout Standard - -To avoid this race condition we start by setting the -\family typewriter -threshold -\family default - of the memory descriptor to 0, making the descriptor inactive. - We then insert the match entry into the match list and proceed to search - the events in the unexpected message queue. - A matching message that arrives as we are searching the unexpected message - queue will not be accepted by the memory descriptor and, if not matched - by an earlier match list element, will add an event to the unexpected message - queue. - After searching the events in the unexpected message queue, we update the - memory descriptor, setting the threshold to 1 to activate the memory descriptor. - This update is predicated by the condition that the unexpected message - queue is empty. - We repeat the process of searching the unexpected message queue until the - update succeeds. -\layout Standard - -The following code fragment illustrates this approach. - Because events must be removed from the unexpected message queue to be - examined, this code fragment assumes the existence of a user managed event - list, -\family typewriter -Rcvd -\family default -, for the events that have already been removed from the unexpected message - queue. - In an effort to keep the example focused on the basic protocol, we have - omitted the code that would be needed to manage the memory descriptors - used for unexpected short messages. - In particular, we simply leave messages in these descriptors until they - are received by the application. - In a robust implementation, we would introduce code to ensure that short - unexpected messages are removed from these memory descriptors so that they - can be re-used. -\layout LyX-Code - - -\size small -extern ptl_handle_eq_t UnexpQueue; -\newline -extern ptl_handle_me_t RcvMark; -\newline -extern ptl_handle_me_t ShortMatch; -\newline - -\newline -typedef struct event_list_tag { -\newline - ptl_event_t event; -\newline - struct event_list_tag* next; -\newline -} event_list; -\newline - -\newline -extern event_list Rcvd; -\newline - -\newline -void AppendRcvd( ptl_event_t event ) -\newline -{ -\newline - /* append an event onto the Rcvd list */ -\newline -} -\newline - -\newline -int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi -ts_t match, -\newline - ptl_match_bits_t ignore, ptl_event_t *event ) -\newline -{ -\newline - /* Search the Rcvd event queue, looking for a message that matches the - requested message. -\newline - * If one is found, remove the event from the Rcvd list and return it. - */ -\newline -} -\newline - -\newline -typedef enum { RECEIVED, POSTED } receive_state; -\newline - -\newline -receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event, - ptl_md_t md_buf ) -\newline -{ -\newline - ptl_md_t md_buf; -\newline - ptl_handle_me_t me_handle; -\newline - -\newline - if( event.rlength >= MPI_LONG_LENGTH ) { -\newline - PtlMDBind( MPI_NI, md_buf, &md_handle ); -\newline - PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX, - md_handle ); -\newline - return POSTED; -\newline - } else { -\newline - /* copy the message */ -\newline - if( event.mlength < *length ) *length = event.mlength; -\newline - memcpy( buf, (char*)event.md_desc.start+event.offset, *length ); -\newline - return RECEIVED; -\newline - } -\newline -} -\newline - -\newline -receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle -_eq_t eventq, -\newline - ptl_process_id_t sender, ptl_match_bits_t match, - ptl_match_bits_t ignore ) -\newline -{ -\newline - ptl_md_t md_buf; -\newline - ptl_handle_md_t md_handle; -\newline - ptl_handle_me_t me_handle; -\newline - ptl_event_t event; -\newline - -\newline - /* build a memory descriptor for the receive */ -\newline - md_buf.start = buf; -\newline - md_buf.length = *len; -\newline - md_buf.threshold = 0; /* temporarily disabled */ -\newline - md_buf.options = PTL_MD_PUT_OP; -\newline - md_buf.user_ptr = MPI_data; -\newline - md_buf.eventq = eventq; -\newline - -\newline - /* see if we have already received the message */ -\newline - if( SearchRcvd(buf, len, sender, match, ignore, &event) ) -\newline - return CopyMsg( buf, len, event, md_buf ); -\newline - -\newline - /* create the match entry and attach the memory descriptor */ -\newline - PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark, - &me_handle); -\newline - PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle ); -\newline - -\newline - md_buf.threshold = 1; -\newline - do -\newline - if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) { -\newline - if( MPIMatch(event, match, ignore, sender) ) { -\newline - return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset, - md_buf ); -\newline - } else { -\newline - AppendRcvd( event ); -\newline - } -\newline - } -\newline - while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE - ); -\newline - return POSTED; -\newline -} -\layout Chapter* - -Acknowledgments -\layout Standard - -Several people have contributed to the philosophy, design, and implementation - of the Portals message passing architecture as it has evolved. - We acknowledge the following people for their contributions: Al Audette, - Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike - Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke, - Dave van Dresser, Lee Ward, and Stephen Wheat. - -\layout Standard - - -\begin_inset LatexCommand \BibTeX[ieee]{portals3} - -\end_inset - - -\the_end diff --git a/lnet/doc/put.fig b/lnet/doc/put.fig deleted file mode 100644 index 5235b6d7880836321ca385bdc4bedfa30c92e30b..0000000000000000000000000000000000000000 --- a/lnet/doc/put.fig +++ /dev/null @@ -1,32 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 1350 900 2175 1200 -4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001 -4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001 --6 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 1275 2700 1725 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 900 525 2700 1200 -2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 - 0 300 1200 300 1200 2250 0 2250 0 300 -2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 - 2400 300 3600 300 3600 2250 2400 2250 2400 300 -2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2 - 0 0 1.00 60.00 120.00 - 2699 1788 899 1938 -4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001 -4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001 -4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001 -4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001 -4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 -4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 diff --git a/lnet/include/.cvsignore b/lnet/include/.cvsignore deleted file mode 100644 index 94d3790678c916e364e1ab73c431e7e3c2d88b8b..0000000000000000000000000000000000000000 --- a/lnet/include/.cvsignore +++ /dev/null @@ -1,6 +0,0 @@ -config.h -stamp-h -stamp-h1 -stamp-h.in -Makefile -Makefile.in diff --git a/lnet/include/Makefile.am b/lnet/include/Makefile.am deleted file mode 100644 index 006180b83629fa90d694582309745038d9c35e16..0000000000000000000000000000000000000000 --- a/lnet/include/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = libcfs lnet - -EXTRA_DIST = cygwin-ioctl.h diff --git a/lnet/include/cygwin-ioctl.h b/lnet/include/cygwin-ioctl.h deleted file mode 100644 index 8a33957adbd9870e3ef354cdc7c7da3c68fdb98d..0000000000000000000000000000000000000000 --- a/lnet/include/cygwin-ioctl.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * linux/ioctl.h for Linux by H.H. Bergman. - */ - -#ifndef _ASMI386_IOCTL_H -#define _ASMI386_IOCTL_H - -/* ioctl command encoding: 32 bits total, command in lower 16 bits, - * size of the parameter structure in the lower 14 bits of the - * upper 16 bits. - * Encoding the size of the parameter structure in the ioctl request - * is useful for catching programs compiled with old versions - * and to avoid overwriting user space outside the user buffer area. - * The highest 2 bits are reserved for indicating the ``access mode''. - * NOTE: This limits the max parameter size to 16kB -1 ! - */ - -/* - * The following is for compatibility across the various Linux - * platforms. The i386 ioctl numbering scheme doesn't really enforce - * a type field. De facto, however, the top 8 bits of the lower 16 - * bits are indeed used as a type field, so we might just as well make - * this explicit here. Please be sure to use the decoding macros - * below from now on. - */ -#undef _IO -#undef _IOR -#undef _IOW -#undef _IOC -#undef IOC_IN -#undef IOC_OUT - -#define _IOC_NRBITS 8 -#define _IOC_TYPEBITS 8 -#define _IOC_SIZEBITS 14 -#define _IOC_DIRBITS 2 - -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) -#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) -#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) -#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) - -#define _IOC_NRSHIFT 0 -#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) -#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) -#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) - -/* - * Direction bits. - */ -#define _IOC_NONE 0U -#define _IOC_WRITE 1U -#define _IOC_READ 2U - -#define _IOC(dir,type,nr,size) \ - (((dir) << _IOC_DIRSHIFT) | \ - ((type) << _IOC_TYPESHIFT) | \ - ((nr) << _IOC_NRSHIFT) | \ - ((size) << _IOC_SIZESHIFT)) - -/* used to create numbers */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) - -/* used to decode ioctl numbers.. */ -#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) -#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) -#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) - -/* ...and for the drivers/sound files... */ - -#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) -#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) -#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) -#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) -#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) - -#endif /* _ASMI386_IOCTL_H */ diff --git a/lnet/include/libcfs/.cvsignore b/lnet/include/libcfs/.cvsignore deleted file mode 100644 index 3dda72986fc5af262451a760393b3a7065938c80..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile.in -Makefile diff --git a/lnet/include/libcfs/Makefile.am b/lnet/include/libcfs/Makefile.am deleted file mode 100644 index 472d0ae50268d875fd4f2d3fca0c330fa302be7e..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/Makefile.am +++ /dev/null @@ -1,9 +0,0 @@ -SUBDIRS := linux -if DARWIN -SUBDIRS += darwin -endif -DIST_SUBDIRS := $(SUBDIRS) - -EXTRA_DIST := curproc.h kp30.h libcfs.h list.h lltrace.h \ - portals_utils.h types.h user-lock.h user-prim.h user-time.h \ - user-tcpip.h user-bitops.h bitmap.h diff --git a/lnet/include/libcfs/bitmap.h b/lnet/include/libcfs/bitmap.h deleted file mode 100644 index 7f6189a36503eadbebbfeb1790795c5560dd0d81..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/bitmap.h +++ /dev/null @@ -1,80 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2007 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#ifndef _LIBCFS_BITMAP_H_ -#define _LIBCFS_BITMAP_H_ - - -typedef struct { - int size; - unsigned long data[0]; -} bitmap_t; - -#define CFS_BITMAP_SIZE(nbits) \ - (((nbits/BITS_PER_LONG)+1)*sizeof(long)+sizeof(bitmap_t)) - -static inline -bitmap_t *ALLOCATE_BITMAP(int size) -{ - bitmap_t *ptr; - - OBD_ALLOC(ptr, CFS_BITMAP_SIZE(size)); - if (ptr == NULL) - RETURN(ptr); - - ptr->size = size; - - RETURN (ptr); -} - -#define FREE_BITMAP(ptr) OBD_FREE(ptr, CFS_BITMAP_SIZE(ptr->size)) - -static inline -void cfs_bitmap_set(bitmap_t *bitmap, int nbit) -{ - set_bit(nbit, bitmap->data); -} - -static inline -void cfs_bitmap_clear(bitmap_t *bitmap, int nbit) -{ - clear_bit(nbit, bitmap->data); -} - -static inline -int cfs_bitmap_check(bitmap_t *bitmap, int nbit) -{ - return test_bit(nbit, bitmap->data); -} - -/* return 0 is bitmap has none set bits */ -static inline -int cfs_bitmap_check_empty(bitmap_t *bitmap) -{ - return find_first_bit(bitmap->data, bitmap->size) == bitmap->size; -} - -#define cfs_foreach_bit(bitmap, pos) \ - for((pos)=find_first_bit((bitmap)->data, bitmap->size); \ - (pos) < (bitmap)->size; \ - (pos) = find_next_bit((bitmap)->data, (bitmap)->size, (pos))) - -#endif diff --git a/lnet/include/libcfs/curproc.h b/lnet/include/libcfs/curproc.h deleted file mode 100644 index 6495c661d31407d7151650b65cdb07a4c7fbd3d3..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/curproc.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre curproc API declaration - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - * Public License for more details. You should have received a copy of the GNU - * General Public License along with Lustre; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#ifndef __LIBCFS_CURPROC_H__ -#define __LIBCFS_CURPROC_H__ - -#ifdef __KERNEL__ -/* - * Portable API to access common characteristics of "current" UNIX process. - * - * Implemented in portals/include/libcfs/<os>/ - */ -uid_t cfs_curproc_uid(void); -gid_t cfs_curproc_gid(void); -uid_t cfs_curproc_fsuid(void); -gid_t cfs_curproc_fsgid(void); -pid_t cfs_curproc_pid(void); -int cfs_curproc_groups_nr(void); -int cfs_curproc_is_in_groups(gid_t group); -void cfs_curproc_groups_dump(gid_t *array, int size); -mode_t cfs_curproc_umask(void); -char *cfs_curproc_comm(void); - - -/* - * Plus, platform-specific constant - * - * CFS_CURPROC_COMM_MAX, - * - * and opaque scalar type - * - * cfs_kernel_cap_t - */ -cfs_kernel_cap_t cfs_curproc_cap_get(void); -void cfs_curproc_cap_set(cfs_kernel_cap_t cap); -#endif - -/* __LIBCFS_CURPROC_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/darwin/.cvsignore b/lnet/include/libcfs/darwin/.cvsignore deleted file mode 100644 index 3dda72986fc5af262451a760393b3a7065938c80..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile.in -Makefile diff --git a/lnet/include/libcfs/darwin/Makefile.am b/lnet/include/libcfs/darwin/Makefile.am deleted file mode 100644 index f2f217a2deb6a5b140ba1e17f7ad0093f74f29a9..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -EXTRA_DIST := darwin-mem.h darwin-types.h libcfs.h portals_utils.h \ - darwin-fs.h darwin-prim.h darwin-utils.h lltrace.h \ - darwin-lock.h darwin-sync.h darwin-tcpip.h kp30.h diff --git a/lnet/include/libcfs/darwin/darwin-fs.h b/lnet/include/libcfs/darwin/darwin-fs.h deleted file mode 100644 index da613ba1f2186260752a86de7555a632f813cd8a..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-fs.h +++ /dev/null @@ -1,193 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Implementation of standard file system interfaces for XNU kernel. - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ -#ifndef __LIBCFS_DARWIN_FS_H__ -#define __LIBCFS_DARWIN_FS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ - -#include <sys/types.h> -#include <sys/systm.h> - -#include <sys/kernel.h> -#include <sys/file.h> -#include <sys/time.h> -#include <sys/filedesc.h> -#include <sys/mount.h> -#include <sys/stat.h> -#include <sys/sysctl.h> -#include <sys/ubc.h> -#include <sys/mbuf.h> -#include <sys/namei.h> -#include <sys/fcntl.h> -#include <sys/lockf.h> -#include <stdarg.h> - -#include <mach/mach_types.h> -#include <mach/time_value.h> -#include <kern/clock.h> -#include <sys/param.h> -#include <IOKit/system.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-lock.h> -#include <libcfs/darwin/darwin-mem.h> -#include <libcfs/list.h> - -/* - * File operating APIs in kernel - */ -#ifdef __DARWIN8__ -/* - * Kernel file descriptor - */ -typedef struct cfs_kern_file { - int f_flags; - vnode_t f_vp; - vfs_context_t f_ctxt; -} cfs_file_t; - -#else - -typedef struct file cfs_file_t; - -#endif - -int kern_file_size(cfs_file_t *fp, off_t *size); -#define cfs_filp_size(fp) \ - ({ \ - off_t __size; \ - kern_file_size((fp), &__size); \ - __size; \ - }) -#define cfs_filp_poff(fp) (NULL) - -cfs_file_t *kern_file_open(const char *name, int flags, int mode, int *err); -int kern_file_close(cfs_file_t *fp); -int kern_file_read(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos); -int kern_file_write(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos); -int kern_file_sync(cfs_file_t *fp); - -#define cfs_filp_open(n, f, m, e) kern_file_open(n, f, m, e) -#define cfs_filp_close(f) kern_file_close(f) -#define cfs_filp_read(f, b, n, p) kern_file_read(f, b, n, p) -#define cfs_filp_write(f, b, n, p) kern_file_write(f, b, n, p) -#define cfs_filp_fsync(f) kern_file_sync(f) - -int ref_file(cfs_file_t *fp); -int rele_file(cfs_file_t *fp); -int file_count(cfs_file_t *fp); -#define cfs_get_file(f) ref_file(f) -#define cfs_put_file(f) rele_file(f) -#define cfs_file_count(f) file_count(f) - -#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) -#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t) - -typedef struct flock cfs_flock_t; -#define cfs_flock_type(fl) ((fl)->l_type) -#define cfs_flock_set_type(fl, type) do { (fl)->l_type = (type); } while(0) -#define cfs_flock_pid(fl) ((fl)->l_pid) -#define cfs_flock_set_pid(fl, pid) do { (fl)->l_pid = (pid); } while(0) -#define cfs_flock_start(fl) ((fl)->l_start) -#define cfs_flock_set_start(fl, start) do { (fl)->l_start = (start); } while(0) - -static inline loff_t cfs_flock_end(cfs_flock_t *fl) -{ - return (fl->l_len == 0 ? CFS_OFFSET_MAX: (fl->l_start + fl->l_len)); -} - -static inline void cfs_flock_set_end(cfs_flock_t *fl, loff_t end) -{ - if (end == CFS_OFFSET_MAX) - fl->l_len = 0; - else - fl->l_len = end - fl->l_start; -} - -#define ATTR_MODE 0x0001 -#define ATTR_UID 0x0002 -#define ATTR_GID 0x0004 -#define ATTR_SIZE 0x0008 -#define ATTR_ATIME 0x0010 -#define ATTR_MTIME 0x0020 -#define ATTR_CTIME 0x0040 -#define ATTR_ATIME_SET 0x0080 -#define ATTR_MTIME_SET 0x0100 -#define ATTR_FORCE 0x0200 /* Not a change, but a change it */ -#define ATTR_ATTR_FLAG 0x0400 -#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -#define ATTR_CTIME_SET 0x2000 -#define ATTR_BLOCKS 0x4000 -#define ATTR_KILL_SUID 0 -#define ATTR_KILL_SGID 0 - -#define in_group_p(x) (0) - -struct posix_acl_entry { - short e_tag; - unsigned short e_perm; - unsigned int e_id; -}; - -struct posix_acl { - atomic_t a_refcount; - unsigned int a_count; - struct posix_acl_entry a_entries[0]; -}; - -struct posix_acl *posix_acl_alloc(int count, int flags); -static inline struct posix_acl *posix_acl_from_xattr(const void *value, - size_t size) -{ - return posix_acl_alloc(0, 0); -} -static inline void posix_acl_release(struct posix_acl *acl) {}; -static inline int posix_acl_valid(const struct posix_acl *acl) { return 0; } -static inline struct posix_acl * posix_acl_dup(struct posix_acl *acl) -{ - return acl; -} - -#else /* !__KERNEL__ */ - -typedef struct file cfs_file_t; - -#endif /* END __KERNEL__ */ - -typedef struct { - void *d; -} cfs_dentry_t; - -#ifndef O_SYNC -#define O_SYNC 0 -#endif -#ifndef O_DIRECTORY -#define O_DIRECTORY 0 -#endif -#ifndef O_LARGEFILE -#define O_LARGEFILE 0 -#endif - -#endif diff --git a/lnet/include/libcfs/darwin/darwin-lock.h b/lnet/include/libcfs/darwin/darwin-lock.h deleted file mode 100644 index f826fef26395d059a7ab7cbfb7eb68302077bae4..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-lock.h +++ /dev/null @@ -1,284 +0,0 @@ -#ifndef __LIBCFS_DARWIN_CFS_LOCK_H__ -#define __LIBCFS_DARWIN_CFS_LOCK_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <mach/sync_policy.h> -#include <mach/task.h> -#include <mach/semaphore.h> -#include <kern/assert.h> -#include <kern/thread.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-sync.h> - -/* - * spin_lock (use Linux kernel's primitives) - * - * - spin_lock_init(x) - * - spin_lock(x) - * - spin_unlock(x) - * - spin_trylock(x) - * - * - spin_lock_irqsave(x, f) - * - spin_unlock_irqrestore(x, f) - */ -struct spin_lock { - struct kspin spin; -}; - -typedef struct spin_lock spinlock_t; - -static inline void spin_lock_init(spinlock_t *lock) -{ - kspin_init(&lock->spin); -} - -static inline void spin_lock(spinlock_t *lock) -{ - kspin_lock(&lock->spin); -} - -static inline void spin_unlock(spinlock_t *lock) -{ - kspin_unlock(&lock->spin); -} - -static inline int spin_trylock(spinlock_t *lock) -{ - return kspin_trylock(&lock->spin); -} - -static inline void spin_lock_done(spinlock_t *lock) -{ - kspin_done(&lock->spin); -} - -#error "does this lock out timer callbacks?" -#define spin_lock_bh(x) spin_lock(x) -#define spin_unlock_bh(x) spin_unlock(x) -#define spin_lock_bh_init(x) spin_lock_init(x) - -extern boolean_t ml_set_interrupts_enabled(boolean_t enable); -#define __disable_irq() ml_set_interrupts_enabled(FALSE) -#define __enable_irq(x) (void) ml_set_interrupts_enabled(x) - -#define spin_lock_irqsave(s, f) do{ \ - f = __disable_irq(); \ - spin_lock(s); }while(0) - -#define spin_unlock_irqrestore(s, f) do{ \ - spin_unlock(s); \ - __enable_irq(f);}while(0) - -/* - * Semaphore - * - * - sema_init(x, v) - * - __down(x) - * - __up(x) - */ -struct semaphore { - struct ksem sem; -}; - -static inline void sema_init(struct semaphore *s, int val) -{ - ksem_init(&s->sem, val); -} - -static inline void __down(struct semaphore *s) -{ - ksem_down(&s->sem, 1); -} - -static inline void __up(struct semaphore *s) -{ - ksem_up(&s->sem, 1); -} - -/* - * Mutex: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ - -#define mutex_up(s) __up(s) -#define mutex_down(s) __down(s) - -#define init_mutex(x) sema_init(x, 1) -#define init_mutex_locked(x) sema_init(x, 0) - -/* - * Completion: - * - * - init_completion(c) - * - complete(c) - * - wait_for_completion(c) - */ -struct completion { - /* - * Emulate completion by semaphore for now. - * - * XXX nikita: this is not safe if completion is used to synchronize - * exit from kernel daemon thread and kext unloading. In this case - * some core function (a la complete_and_exit()) is needed. - */ - struct ksem sem; -}; - -static inline void init_completion(struct completion *c) -{ - ksem_init(&c->sem, 0); -} - -static inline void complete(struct completion *c) -{ - ksem_up(&c->sem, 1); -} - -static inline void wait_for_completion(struct completion *c) -{ - ksem_down(&c->sem, 1); -} - -/* - * rw_semaphore: - * - * - DECLARE_RWSEM(x) - * - init_rwsem(x) - * - down_read(x) - * - up_read(x) - * - down_write(x) - * - up_write(x) - */ -struct rw_semaphore { - struct krw_sem s; -}; - -static inline void init_rwsem(struct rw_semaphore *s) -{ - krw_sem_init(&s->s); -} - -static inline void fini_rwsem(struct rw_semaphore *s) -{ - krw_sem_done(&s->s); -} - -static inline void down_read(struct rw_semaphore *s) -{ - krw_sem_down_r(&s->s); -} - -static inline int down_read_trylock(struct rw_semaphore *s) -{ - int ret = krw_sem_down_r_try(&s->s); - return ret == 0; -} - -static inline void down_write(struct rw_semaphore *s) -{ - krw_sem_down_w(&s->s); -} - -static inline int down_write_trylock(struct rw_semaphore *s) -{ - int ret = krw_sem_down_w_try(&s->s); - return ret == 0; -} - -static inline void up_read(struct rw_semaphore *s) -{ - krw_sem_up_r(&s->s); -} - -static inline void up_write(struct rw_semaphore *s) -{ - krw_sem_up_w(&s->s); -} - -/* - * read-write lock : Need to be investigated more!! - * - * - DECLARE_RWLOCK(l) - * - rwlock_init(x) - * - read_lock(x) - * - read_unlock(x) - * - write_lock(x) - * - write_unlock(x) - */ -typedef struct krw_spin rwlock_t; - -#define rwlock_init(pl) krw_spin_init(pl) - -#define read_lock(l) krw_spin_down_r(l) -#define read_unlock(l) krw_spin_up_r(l) -#define write_lock(l) krw_spin_down_w(l) -#define write_unlock(l) krw_spin_up_w(l) - -#define write_lock_irqsave(l, f) do{ \ - f = __disable_irq(); \ - write_lock(l); }while(0) - -#define write_unlock_irqrestore(l, f) do{ \ - write_unlock(l); \ - __enable_irq(f);}while(0) - -#define read_lock_irqsave(l, f) do{ \ - f = __disable_irq(); \ - read_lock(l); }while(0) - -#define read_unlock_irqrestore(l, f) do{ \ - read_unlock(l); \ - __enable_irq(f);}while(0) -/* - * Funnel: - * - * Safe funnel in/out - */ -#ifdef __DARWIN8__ - -#define CFS_DECL_FUNNEL_DATA -#define CFS_DECL_CONE_DATA DECLARE_FUNNEL_DATA -#define CFS_DECL_NET_DATA DECLARE_FUNNEL_DATA -#define CFS_CONE_IN do {} while(0) -#define CFS_CONE_EX do {} while(0) - -#define CFS_NET_IN do {} while(0) -#define CFS_NET_EX do {} while(0) - -#else - -#define CFS_DECL_FUNNEL_DATA \ - boolean_t __funnel_state = FALSE; \ - funnel_t *__funnel -#define CFS_DECL_CONE_DATA CFS_DECL_FUNNEL_DATA -#define CFS_DECL_NET_DATA CFS_DECL_FUNNEL_DATA - -void lustre_cone_in(boolean_t *state, funnel_t **cone); -void lustre_cone_ex(boolean_t state, funnel_t *cone); - -#define CFS_CONE_IN lustre_cone_in(&__funnel_state, &__funnel) -#define CFS_CONE_EX lustre_cone_ex(__funnel_state, __funnel) - -void lustre_net_in(boolean_t *state, funnel_t **cone); -void lustre_net_ex(boolean_t state, funnel_t *cone); - -#define CFS_NET_IN lustre_net_in(&__funnel_state, &__funnel) -#define CFS_NET_EX lustre_net_ex(__funnel_state, __funnel) - -#endif - -#else -#include <libcfs/user-lock.h> -#endif /* __KERNEL__ */ - -/* __XNU_CFS_LOCK_H */ -#endif diff --git a/lnet/include/libcfs/darwin/darwin-mem.h b/lnet/include/libcfs/darwin/darwin-mem.h deleted file mode 100644 index 5ffcd4e549682c52c448ca993fc7ee6472baeec1..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-mem.h +++ /dev/null @@ -1,232 +0,0 @@ -#ifndef __LIBCFS_DARWIN_CFS_MEM_H__ -#define __LIBCFS_DARWIN_CFS_MEM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ - -#include <sys/types.h> -#include <sys/systm.h> - -#include <sys/vm.h> -#include <sys/kernel.h> -#include <sys/ubc.h> -#include <sys/uio.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/lockf.h> - -#include <mach/mach_types.h> -#include <mach/vm_types.h> -#include <vm/pmap.h> -#include <vm/vm_kern.h> -#include <mach/machine/vm_param.h> -#include <kern/thread_call.h> -#include <sys/param.h> -#include <sys/vm.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-sync.h> -#include <libcfs/darwin/darwin-lock.h> -#include <libcfs/list.h> - -/* - * Basic xnu_page struct, should be binary compatibility with - * all page types in xnu (we have only xnu_raw_page, xll_page now) - */ - -/* Variable sized pages are not supported */ - -#ifdef PAGE_SHIFT -#define CFS_PAGE_SHIFT PAGE_SHIFT -#else -#define CFS_PAGE_SHIFT 12 -#endif - -#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT) - -#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE - 1)) - -enum { - XNU_PAGE_RAW, - XNU_PAGE_XLL, - XNU_PAGE_NTYPES -}; - -typedef __u32 page_off_t; - -/* - * For XNU we have our own page cache built on top of underlying BSD/MACH - * infrastructure. In particular, we have two disjoint types of pages: - * - * - "raw" pages (XNU_PAGE_RAW): these are just buffers mapped into KVM, - * based on UPLs, and - * - * - "xll" pages (XNU_PAGE_XLL): these are used by file system to cache - * file data, owned by file system objects, hashed, lrued, etc. - * - * cfs_page_t has to cover both of them, because core Lustre code is based on - * the Linux assumption that page is _both_ memory buffer and file system - * caching entity. - * - * To achieve this, all types of pages supported on XNU has to start from - * common header that contains only "page type". Common cfs_page_t operations - * dispatch through operation vector based on page type. - * - */ -typedef struct xnu_page { - int type; -} cfs_page_t; - -struct xnu_page_ops { - void *(*page_map) (cfs_page_t *); - void (*page_unmap) (cfs_page_t *); - void *(*page_address) (cfs_page_t *); -}; - -void xnu_page_ops_register(int type, struct xnu_page_ops *ops); -void xnu_page_ops_unregister(int type); - -/* - * raw page, no cache object, just like buffer - */ -struct xnu_raw_page { - struct xnu_page header; - void *virtual; - atomic_t count; - struct list_head link; -}; - -/* - * Public interface to lustre - * - * - cfs_alloc_page(f) - * - cfs_free_page(p) - * - cfs_kmap(p) - * - cfs_kunmap(p) - * - cfs_page_address(p) - */ - -/* - * Of all functions above only cfs_kmap(), cfs_kunmap(), and - * cfs_page_address() can be called on file system pages. The rest is for raw - * pages only. - */ - -cfs_page_t *cfs_alloc_page(u_int32_t flags); -void cfs_free_page(cfs_page_t *page); -void cfs_get_page(cfs_page_t *page); -int cfs_put_page_testzero(cfs_page_t *page); -int cfs_page_count(cfs_page_t *page); -#define cfs_page_index(pg) (0) - -void *cfs_page_address(cfs_page_t *pg); -void *cfs_kmap(cfs_page_t *pg); -void cfs_kunmap(cfs_page_t *pg); - -/* - * Memory allocator - */ - -void *cfs_alloc(size_t nr_bytes, u_int32_t flags); -void cfs_free(void *addr); - -void *cfs_alloc_large(size_t nr_bytes); -void cfs_free_large(void *addr); - -extern int get_preemption_level(void); - -#define CFS_ALLOC_ATOMIC_TRY \ - (get_preemption_level() != 0 ? CFS_ALLOC_ATOMIC : 0) - -/* - * Slab: - * - * No slab in OSX, use zone allocator to simulate slab - */ -#define SLAB_HWCACHE_ALIGN 0 - -#ifdef __DARWIN8__ -/* - * In Darwin8, we cannot use zalloc_noblock(not exported by kernel), - * also, direct using of zone allocator is not recommended. - */ -#define CFS_INDIVIDUAL_ZONE (0) - -#if !CFS_INDIVIDUAL_ZONE -#include <libkern/OSMalloc.h> -typedef OSMallocTag mem_cache_t; -#else -typedef void* zone_t; -typedef zone_t mem_cache_t; -#endif - -#else /* !__DARWIN8__ */ - -#define CFS_INDIVIDUAL_ZONE (1) - -typedef zone_t mem_cache_t; - -#endif /* !__DARWIN8__ */ - -#define MC_NAME_MAX_LEN 64 - -typedef struct cfs_mem_cache { - int mc_size; - mem_cache_t mc_cache; - struct list_head mc_link; - char mc_name [MC_NAME_MAX_LEN]; -} cfs_mem_cache_t; - -#define KMEM_CACHE_MAX_COUNT 64 -#define KMEM_MAX_ZONE 8192 - -cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long); -int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); -void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); -void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); - -/* - * Misc - */ -/* XXX Liang: num_physpages... fix me */ -#define num_physpages (64 * 1024) - -#define CFS_DECL_MMSPACE -#define CFS_MMSPACE_OPEN do {} while(0) -#define CFS_MMSPACE_CLOSE do {} while(0) - -#define copy_from_user(kaddr, uaddr, size) copyin(CAST_USER_ADDR_T(uaddr), (caddr_t)kaddr, size) -#define copy_to_user(uaddr, kaddr, size) copyout((caddr_t)kaddr, CAST_USER_ADDR_T(uaddr), size) - -#if 0 -static inline int strncpy_from_user(char *kaddr, char *uaddr, int size) -{ - size_t count; - return copyinstr((const user_addr_t)uaddr, (void *)kaddr, size, &count); -} -#endif - -#if defined (__ppc__) -#define mb() __asm__ __volatile__ ("sync" : : : "memory") -#define rmb() __asm__ __volatile__ ("sync" : : : "memory") -#define wmb() __asm__ __volatile__ ("eieio" : : : "memory") -#elif defined (__i386__) -#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#define rmb() mb() -#define wmb() __asm__ __volatile__ ("": : :"memory") -#else -#error architecture not supported -#endif - -#else /* !__KERNEL__ */ - -#define CFS_CACHE_SHIFT 12 -#define PAGE_CACHE_SIZE (1 << CFS_CACHE_SHIFT) -#include <libcfs/user-prim.h> - -#endif /* __KERNEL__ */ - -#endif /* __XNU_CFS_MEM_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-prim.h b/lnet/include/libcfs/darwin/darwin-prim.h deleted file mode 100644 index 0c201c27906b049f452fa153e86e0bb7a54d56c2..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-prim.h +++ /dev/null @@ -1,527 +0,0 @@ -#ifndef __LIBCFS_DARWIN_CFS_PRIM_H__ -#define __LIBCFS_DARWIN_CFS_PRIM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <sys/types.h> -#include <sys/systm.h> - -#ifndef __DARWIN8__ -# ifndef __APPLE_API_PRIVATE -# define __APPLE_API_PRIVATE -# include <sys/user.h> -# undef __APPLE_API_PRIVATE -# else -# include <sys/user.h> -# endif -# include <mach/mach_traps.h> -# include <mach/thread_switch.h> -# include <machine/cpu_number.h> -#endif /* !__DARWIN8__ */ - -#include <sys/kernel.h> - -#include <mach/thread_act.h> -#include <mach/mach_types.h> -#include <mach/time_value.h> -#include <kern/sched_prim.h> -#include <vm/pmap.h> -#include <vm/vm_kern.h> -#include <mach/machine/vm_param.h> -#include <machine/machine_routines.h> -#include <kern/clock.h> -#include <kern/thread_call.h> -#include <sys/param.h> -#include <sys/vm.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-utils.h> -#include <libcfs/darwin/darwin-lock.h> - -/* - * Symbol functions for libcfs - * - * OSX has no facility for use to register symbol. - * So we have to implement it. - */ -#define CFS_SYMBOL_LEN 64 - -struct cfs_symbol { - char name[CFS_SYMBOL_LEN]; - void *value; - int ref; - struct list_head sym_list; -}; - -extern kern_return_t cfs_symbol_register(const char *, const void *); -extern kern_return_t cfs_symbol_unregister(const char *); -extern void * cfs_symbol_get(const char *); -extern kern_return_t cfs_symbol_put(const char *); - -/* - * sysctl typedef - * - * User can register/unregister a list of sysctl_oids - * sysctl_oid is data struct of osx's sysctl-entry - */ -#define CONFIG_SYSCTL 1 - -typedef struct sysctl_oid * cfs_sysctl_table_t; -typedef cfs_sysctl_table_t cfs_sysctl_table_header_t; -cfs_sysctl_table_header_t *cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg); -void cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table); - -/* - * Proc file system APIs, no /proc fs support in OSX - */ -typedef struct cfs_proc_dir_entry { - void *data; -} cfs_proc_dir_entry_t; - -cfs_proc_dir_entry_t * cfs_create_proc_entry(char *name, int mod, - cfs_proc_dir_entry_t *parent); -void cfs_free_proc_entry(cfs_proc_dir_entry_t *de); -void cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry); - -typedef int (cfs_read_proc_t)(char *page, char **start, off_t off, - int count, int *eof, void *data); -typedef int (cfs_write_proc_t)(struct file *file, const char *buffer, - unsigned long count, void *data); - -/* - * cfs pseudo device - * - * cfs_psdev_t - * cfs_psdev_register: - * cfs_psdev_deregister: - */ -typedef struct { - int index; - void *handle; - const char *name; - struct cdevsw *devsw; - void *private; -} cfs_psdev_t; - -extern kern_return_t cfs_psdev_register(cfs_psdev_t *); -extern kern_return_t cfs_psdev_deregister(cfs_psdev_t *); - -/* - * Task struct and ... - * - * Using BSD current_proc in Darwin - */ -extern boolean_t assert_wait_possible(void); -extern void *get_bsdtask_info(task_t); - -#ifdef __DARWIN8__ - -typedef struct {} cfs_task_t; -#define cfs_current() ((cfs_task_t *)current_thread()) -#else /* !__DARWIN8__ */ - -typedef struct uthread cfs_task_t; - -#define current_uthread() ((struct uthread *)get_bsdthread_info(current_act())) -#define cfs_current() current_uthread() - -#endif /* !__DARWIN8__ */ - -#define cfs_task_lock(t) do {;} while (0) -#define cfs_task_unlock(t) do {;} while (0) - -#define set_current_state(s) do {;} while (0) - -#define CFS_DECL_JOURNAL_DATA -#define CFS_PUSH_JOURNAL do {;} while(0) -#define CFS_POP_JOURNAL do {;} while(0) - -#define THREAD_NAME(comm, fmt, a...) -/* - * Kernel thread: - * - * OSX kernel thread can not be created with args, - * so we have to implement new APIs to create thread with args - */ - -typedef int (*cfs_thread_t)(void *); - -extern task_t kernel_task; - -/* - * cloning flags, no use in OSX, just copy them from Linux - */ -#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ -#define CLONE_VM 0x00000100 /* set if VM shared between processes */ -#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ -#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ -#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ -#define CLONE_PID 0x00001000 /* set if pid shared */ -#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ -#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ -#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ -#define CLONE_THREAD 0x00010000 /* Same thread group? */ -#define CLONE_NEWNS 0x00020000 /* New namespace group? */ - -#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) - -extern int cfs_kernel_thread(cfs_thread_t func, void *arg, int flag); - - -/* - * Wait Queue implementation - * - * Like wait_queue in Linux - */ -typedef struct cfs_waitq { - struct ksleep_chan wq_ksleep_chan; -} cfs_waitq_t; - -typedef struct cfs_waitlink { - struct cfs_waitq *wl_waitq; - struct ksleep_link wl_ksleep_link; -} cfs_waitlink_t; - -typedef int cfs_task_state_t; - -#define CFS_TASK_INTERRUPTIBLE THREAD_ABORTSAFE -#define CFS_TASK_UNINT THREAD_UNINT - -void cfs_waitq_init(struct cfs_waitq *waitq); -void cfs_waitlink_init(struct cfs_waitlink *link); - -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, - struct cfs_waitlink *link); -void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); -void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); -int cfs_waitq_active(struct cfs_waitq *waitq); - -void cfs_waitq_signal(struct cfs_waitq *waitq); -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); -void cfs_waitq_broadcast(struct cfs_waitq *waitq); - -void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state); -cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, - cfs_task_state_t state, - cfs_duration_t timeout); - -/* - * Thread schedule APIs. - */ -#define MAX_SCHEDULE_TIMEOUT ((long)(~0UL>>12)) -extern void thread_set_timer_deadline(__u64 deadline); -extern void thread_cancel_timer(void); - -static inline int cfs_schedule_timeout(int state, int64_t timeout) -{ - int result; - -#ifdef __DARWIN8__ - result = assert_wait((event_t)current_thread(), state); -#else - result = assert_wait((event_t)current_uthread(), state); -#endif - if (timeout > 0) { - __u64 expire; - nanoseconds_to_absolutetime(timeout, &expire); - clock_absolutetime_interval_to_deadline(expire, &expire); - thread_set_timer_deadline(expire); - } - if (result == THREAD_WAITING) - result = thread_block(THREAD_CONTINUE_NULL); - if (timeout > 0) - thread_cancel_timer(); - if (result == THREAD_TIMED_OUT) - result = 0; - else - result = 1; - return result; -} - -#define cfs_schedule() cfs_schedule_timeout(CFS_TASK_UNINT, CFS_TICK) -#define cfs_pause(tick) cfs_schedule_timeout(CFS_TASK_UNINT, tick) - -#define __wait_event(wq, condition) \ -do { \ - struct cfs_waitlink __wait; \ - \ - cfs_waitlink_init(&__wait); \ - for (;;) { \ - cfs_waitq_add(&wq, &__wait); \ - if (condition) \ - break; \ - cfs_waitq_wait(&__wait, CFS_TASK_UNINT); \ - cfs_waitq_del(&wq, &__wait); \ - } \ - cfs_waitq_del(&wq, &__wait); \ -} while (0) - -#define wait_event(wq, condition) \ -do { \ - if (condition) \ - break; \ - __wait_event(wq, condition); \ -} while (0) - -#define __wait_event_interruptible(wq, condition, ex, ret) \ -do { \ - struct cfs_waitlink __wait; \ - \ - cfs_waitlink_init(&__wait); \ - for (;;) { \ - if (ex == 0) \ - cfs_waitq_add(&wq, &__wait); \ - else \ - cfs_waitq_add_exclusive(&wq, &__wait); \ - if (condition) \ - break; \ - if (!cfs_signal_pending()) { \ - cfs_waitq_wait(&__wait, \ - CFS_TASK_INTERRUPTIBLE); \ - cfs_waitq_del(&wq, &__wait); \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - break; \ - } \ - cfs_waitq_del(&wq, &__wait); \ -} while (0) - -#define wait_event_interruptible(wq, condition) \ -({ \ - int __ret = 0; \ - if (!condition) \ - __wait_event_interruptible(wq, condition, \ - 0, __ret); \ - __ret; \ -}) - -#define wait_event_interruptible_exclusive(wq, condition) \ -({ \ - int __ret = 0; \ - if (!condition) \ - __wait_event_interruptible(wq, condition, \ - 1, __ret); \ - __ret; \ -}) - -#ifndef __DARWIN8__ -extern void wakeup_one __P((void * chan)); -#endif -/* only used in tests */ -#define wake_up_process(p) \ - do { \ - wakeup_one((caddr_t)p); \ - } while (0) - -/* used in couple of places */ -static inline void sleep_on(cfs_waitq_t *waitq) -{ - cfs_waitlink_t link; - - cfs_waitlink_init(&link); - cfs_waitq_add(waitq, &link); - cfs_waitq_wait(&link, CFS_TASK_UNINT); - cfs_waitq_del(waitq, &link); -} - -/* - * Signal - */ -typedef sigset_t cfs_sigset_t; - -#define SIGNAL_MASK_ASSERT() -/* - * Timer - */ -typedef struct cfs_timer { - struct ktimer t; -} cfs_timer_t; - -#define cfs_init_timer(t) do {} while(0) -void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg); -void cfs_timer_done(struct cfs_timer *t); -void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline); -void cfs_timer_disarm(struct cfs_timer *t); -int cfs_timer_is_armed(struct cfs_timer *t); - -cfs_time_t cfs_timer_deadline(struct cfs_timer *t); - -/* - * Ioctl - * We don't need to copy out everything in osx - */ -#define cfs_ioctl_data_out(a, d, l) \ - ({ \ - int __size; \ - int __rc = 0; \ - assert((l) >= sizeof(*d)); \ - __size = (l) - sizeof(*d); \ - if (__size > 0) \ - __rc = copy_to_user((void *)a + __size, \ - (void *)d + __size, \ - __size); \ - __rc; \ - }) - -/* - * CPU - */ -/* Run in PowerG5 who is PPC64 */ -#define SMP_CACHE_BYTES 128 -#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) -#define NR_CPUS 2 - -/* - * XXX Liang: patch xnu and export current_processor()? - * - * #define smp_processor_id() current_processor() - */ -#define smp_processor_id() 0 -/* XXX smp_call_function is not supported in xnu */ -#define smp_call_function(f, a, n, w) do {} while(0) -int cfs_online_cpus(void); -#define smp_num_cpus cfs_online_cpus() - -/* - * Misc - */ -extern int is_suser(void); - -#ifndef likely -#define likely(exp) (exp) -#endif -#ifndef unlikely -#define unlikely(exp) (exp) -#endif - -#define lock_kernel() do {} while(0) -#define unlock_kernel() do {} while(0) - -#define CAP_SYS_BOOT 0 -#define CAP_SYS_ADMIN 1 -#define capable(a) ((a) == CAP_SYS_BOOT ? is_suser(): is_suser1()) - -#define USERMODEHELPER(path, argv, envp) (0) - -#define cfs_module(name, version, init, fini) \ -extern kern_return_t _start(kmod_info_t *ki, void *data); \ -extern kern_return_t _stop(kmod_info_t *ki, void *data); \ -__private_extern__ kern_return_t name##_start(kmod_info_t *ki, void *data); \ -__private_extern__ kern_return_t name##_stop(kmod_info_t *ki, void *data); \ - \ -kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1, \ - { "com.clusterfs.lustre." #name }, { version }, \ - -1, 0, 0, 0, 0, name##_start, name##_stop }; \ - \ -__private_extern__ kmod_start_func_t *_realmain = name##_start; \ -__private_extern__ kmod_stop_func_t *_antimain = name##_stop; \ -__private_extern__ int _kext_apple_cc = __APPLE_CC__ ; \ - \ -kern_return_t name##_start(kmod_info_t *ki, void *d) \ -{ \ - return init(); \ -} \ - \ -kern_return_t name##_stop(kmod_info_t *ki, void *d) \ -{ \ - fini(); \ - return KERN_SUCCESS; \ -} \ - \ -/* \ - * to allow semicolon after cfs_module(...) \ - */ \ -struct __dummy_ ## name ## _struct {} - -#define inter_module_get(n) cfs_symbol_get(n) -#define inter_module_put(n) cfs_symbol_put(n) - -static inline int request_module(char *name) -{ - return (-EINVAL); -} - -#ifndef __exit -#define __exit -#endif -#ifndef __init -#define __init -#endif - -#define EXPORT_SYMBOL(s) -#define MODULE_AUTHOR(s) -#define MODULE_DESCRIPTION(s) -#define MODULE_LICENSE(s) -#define MODULE_PARM(a, b) -#define MODULE_PARM_DESC(a, b) - -#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) -#define LINUX_VERSION_CODE KERNEL_VERSION(2,5,0) - -#define NR_IRQS 512 -#define in_interrupt() ml_at_interrupt_context() - -#define KERN_EMERG "<0>" /* system is unusable */ -#define KERN_ALERT "<1>" /* action must be taken immediately */ -#define KERN_CRIT "<2>" /* critical conditions */ -#define KERN_ERR "<3>" /* error conditions */ -#define KERN_WARNING "<4>" /* warning conditions */ -#define KERN_NOTICE "<5>" /* normal but significant condition */ -#define KERN_INFO "<6>" /* informational */ -#define KERN_DEBUG "<7>" /* debug-level messages */ - -static inline long PTR_ERR(const void *ptr) -{ - return (long) ptr; -} - -#define ERR_PTR(err) ((void *)err) -#define IS_ERR(p) ((unsigned long)(p) + 1000 < 1000) - -#else /* !__KERNEL__ */ - -typedef struct cfs_proc_dir_entry { - void *data; -} cfs_proc_dir_entry_t; - -#include <libcfs/user-prim.h> -#define __WORDSIZE 32 - -#endif /* END __KERNEL__ */ -/* - * Error number - */ -#ifndef EPROTO -#define EPROTO EPROTOTYPE -#endif -#ifndef EBADR -#define EBADR EBADRPC -#endif -#ifndef ERESTARTSYS -#define ERESTARTSYS 512 -#endif -#ifndef EDEADLOCK -#define EDEADLOCK EDEADLK -#endif -#ifndef ECOMM -#define ECOMM EINVAL -#endif -#ifndef ENODATA -#define ENODATA EINVAL -#endif -#ifndef ENOTSUPP -#define ENOTSUPP EINVAL -#endif - -#if BYTE_ORDER == BIG_ENDIAN -# define __BIG_ENDIAN -#else -# define __LITTLE_ENDIAN -#endif - -#endif /* __LIBCFS_DARWIN_CFS_PRIM_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-sync.h b/lnet/include/libcfs/darwin/darwin-sync.h deleted file mode 100644 index 7817b389d131180a3507f31066a66cd73a422c77..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-sync.h +++ /dev/null @@ -1,332 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Implementation of standard libcfs synchronization primitives for XNU - * kernel. - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -/* - * xnu_sync.h - * - * Created by nikita on Sun Jul 18 2004. - * - * Prototypes of XNU synchronization primitives. - */ - -#ifndef __LIBCFS_DARWIN_XNU_SYNC_H__ -#define __LIBCFS_DARWIN_XNU_SYNC_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#define XNU_SYNC_DEBUG (1) - -#if XNU_SYNC_DEBUG -#define ON_SYNC_DEBUG(e) e -#else -#define ON_SYNC_DEBUG(e) -#endif - -enum { - /* "egrep -i '^(o?x)?[abcdeflo]*$' /usr/dict/words" is your friend */ - KMUT_MAGIC = 0x0bac0cab, /* [a, [b, c]] = b (a, c) - c (a, b) */ - KSEM_MAGIC = 0x1abe11ed, - KCOND_MAGIC = 0xb01dface, - KRW_MAGIC = 0xdabb1edd, - KSPIN_MAGIC = 0xca11ab1e, - KRW_SPIN_MAGIC = 0xbabeface, - KSLEEP_CHAN_MAGIC = 0x0debac1e, - KSLEEP_LINK_MAGIC = 0xacc01ade, - KTIMER_MAGIC = 0xbefadd1e -}; - -/* ------------------------- spin lock ------------------------- */ - -/* - * XXX nikita: don't use NCPUS it's hardcoded to (1) in cpus.h - */ -#define SMP (1) - -#include <libcfs/list.h> - -#ifdef __DARWIN8__ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <kern/locks.h> - -/* - * hw_lock is not available in Darwin8 (hw_lock_* are not exported at all), - * so use lck_spin_t. we can hack out lck_spin_t easily, it's the only - * hacking in Darwin8.x. We did so because it'll take a lot of time to - * add lock_done for all locks, maybe it should be done in the future. - * If lock_done for all locks were added, we can: - * - * typedef lck_spin_t *xnu_spin_t; - */ -#if defined (__ppc__) -typedef struct { - unsigned int opaque[3]; -} xnu_spin_t; -#elif defined (__i386__) -typedef struct { - unsigned int opaque[10]; -} xnu_spin_t; -#endif - -/* - * wait_queue is not available in Darwin8 (wait_queue_* are not exported), - * use assert_wait/wakeup/wake_one (wait_queue in kernel hash). - */ -typedef void * xnu_wait_queue_t; - -/* DARWIN8 */ -#else - -#include <mach/mach_types.h> -#include <sys/types.h> -#include <kern/simple_lock.h> - -typedef hw_lock_data_t xnu_spin_t; -typedef struct wait_queue xnu_wait_queue_t; - -/* DARWIN8 */ -#endif - -struct kspin { -#if SMP - xnu_spin_t lock; -#endif -#if XNU_SYNC_DEBUG - unsigned magic; - thread_t owner; -#endif -}; - -void kspin_init(struct kspin *spin); -void kspin_done(struct kspin *spin); -void kspin_lock(struct kspin *spin); -void kspin_unlock(struct kspin *spin); -int kspin_trylock(struct kspin *spin); - -#if XNU_SYNC_DEBUG -/* - * two functions below are for use in assertions - */ -/* true, iff spin-lock is locked by the current thread */ -int kspin_islocked(struct kspin *spin); -/* true, iff spin-lock is not locked by the current thread */ -int kspin_isnotlocked(struct kspin *spin); -#else -#define kspin_islocked(s) (1) -#define kspin_isnotlocked(s) (1) -#endif - -/* ------------------------- rw spinlock ----------------------- */ -struct krw_spin { - struct kspin guard; - int count; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void krw_spin_init(struct krw_spin *sem); -void krw_spin_done(struct krw_spin *sem); -void krw_spin_down_r(struct krw_spin *sem); -void krw_spin_down_w(struct krw_spin *sem); -void krw_spin_up_r(struct krw_spin *sem); -void krw_spin_up_w(struct krw_spin *sem); - -/* ------------------------- semaphore ------------------------- */ - -struct ksem { - struct kspin guard; - xnu_wait_queue_t q; - int value; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void ksem_init(struct ksem *sem, int value); -void ksem_done(struct ksem *sem); -int ksem_up (struct ksem *sem, int value); -void ksem_down(struct ksem *sem, int value); -int ksem_trydown(struct ksem *sem, int value); - -/* ------------------------- mutex ------------------------- */ - -struct kmut { - struct ksem s; -#if XNU_SYNC_DEBUG - unsigned magic; - thread_t owner; -#endif -}; - -void kmut_init(struct kmut *mut); -void kmut_done(struct kmut *mut); - -void kmut_lock (struct kmut *mut); -void kmut_unlock (struct kmut *mut); -int kmut_trylock(struct kmut *mut); - -#if XNU_SYNC_DEBUG -/* - * two functions below are for use in assertions - */ -/* true, iff mutex is locked by the current thread */ -int kmut_islocked(struct kmut *mut); -/* true, iff mutex is not locked by the current thread */ -int kmut_isnotlocked(struct kmut *mut); -#else -#define kmut_islocked(m) (1) -#define kmut_isnotlocked(m) (1) -#endif - -/* ------------------------- condition variable ------------------------- */ - -struct kcond_link { - struct kcond_link *next; - struct ksem sem; -}; - -struct kcond { - struct kspin guard; - struct kcond_link *waiters; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void kcond_init(struct kcond *cond); -void kcond_done(struct kcond *cond); -void kcond_wait(struct kcond *cond, struct kspin *lock); -void kcond_signal(struct kcond *cond); -void kcond_broadcast(struct kcond *cond); - -void kcond_wait_guard(struct kcond *cond); -void kcond_signal_guard(struct kcond *cond); -void kcond_broadcast_guard(struct kcond *cond); - -/* ------------------------- read-write semaphore ------------------------- */ - -struct krw_sem { - int count; - struct kcond cond; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void krw_sem_init(struct krw_sem *sem); -void krw_sem_done(struct krw_sem *sem); -void krw_sem_down_r(struct krw_sem *sem); -int krw_sem_down_r_try(struct krw_sem *sem); -void krw_sem_down_w(struct krw_sem *sem); -int krw_sem_down_w_try(struct krw_sem *sem); -void krw_sem_up_r(struct krw_sem *sem); -void krw_sem_up_w(struct krw_sem *sem); - -/* ------------------------- sleep-channel ------------------------- */ - -struct ksleep_chan { - struct kspin guard; - struct list_head waiters; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -#define KSLEEP_CHAN_INITIALIZER {{{0}}} - -struct ksleep_link { - int flags; - event_t event; - int hits; - struct ksleep_chan *forward; - struct list_head linkage; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -enum { - KSLEEP_EXCLUSIVE = 1 -}; - -void ksleep_chan_init(struct ksleep_chan *chan); -void ksleep_chan_done(struct ksleep_chan *chan); - -void ksleep_link_init(struct ksleep_link *link); -void ksleep_link_done(struct ksleep_link *link); - -void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link); -void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link); - -void ksleep_wait(struct ksleep_chan *chan, int state); -int64_t ksleep_timedwait(struct ksleep_chan *chan, int state, __u64 timeout); - -void ksleep_wake(struct ksleep_chan *chan); -void ksleep_wake_all(struct ksleep_chan *chan); -void ksleep_wake_nr(struct ksleep_chan *chan, int nr); - -#define KSLEEP_LINK_DECLARE(name) \ -{ \ - .flags = 0, \ - .event = 0, \ - .hits = 0, \ - .linkage = CFS_LIST_HEAD(name.linkage), \ - .magic = KSLEEP_LINK_MAGIC \ -} - -/* ------------------------- timer ------------------------- */ - -struct ktimer { - struct kspin guard; - void (*func)(void *); - void *arg; - u_int64_t deadline; /* timer deadline in absolute nanoseconds */ - int armed; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg); -void ktimer_done(struct ktimer *t); -void ktimer_arm(struct ktimer *t, u_int64_t deadline); -void ktimer_disarm(struct ktimer *t); -int ktimer_is_armed(struct ktimer *t); - -u_int64_t ktimer_deadline(struct ktimer *t); - -/* __XNU_SYNC_H__ */ -#endif - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/darwin/darwin-tcpip.h b/lnet/include/libcfs/darwin/darwin-tcpip.h deleted file mode 100644 index 1a73891cf94ade08489040dc45a9e4b70bbcd4fd..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-tcpip.h +++ /dev/null @@ -1,90 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_DARWIN_TCPIP_H__ -#define __LIBCFS_DARWIN_TCPIP_H__ - -#ifdef __KERNEL__ -#include <sys/socket.h> - -#ifdef __DARWIN8__ - -struct socket; - -typedef void (*so_upcall)(socket_t sock, void* arg, int waitf); - -#define CFS_SOCK_UPCALL 0x1 -#define CFS_SOCK_DOWN 0x2 - -#define CFS_SOCK_MAGIC 0xbabeface - -typedef struct cfs_socket { - socket_t s_so; - int s_magic; - int s_flags; - so_upcall s_upcall; - void *s_upcallarg; -} cfs_socket_t; - - -/* cfs_socket_t to bsd socket */ -#define C2B_SOCK(s) ((s)->s_so) - -static inline int get_sock_intopt(socket_t so, int opt) -{ - int val, len; - int rc; - - /* - * sock_getsockopt will take a lock(mutex) for socket, - * so it can be blocked. So be careful while using - * them. - */ - len = sizeof(val); - rc = sock_getsockopt(so, SOL_SOCKET, opt, &val, &len); - assert(rc == 0); - return val; -} - -#define SOCK_ERROR(s) get_sock_intopt(C2B_SOCK(s), SO_ERROR) -/* #define SOCK_WMEM_QUEUED(s) (0) */ -#define SOCK_WMEM_QUEUED(s) get_sock_intopt(C2B_SOCK(s), SO_NWRITE) -/* XXX Liang: no reliable way to get it in Darwin8.x */ -#define SOCK_TEST_NOSPACE(s) (0) - -void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg); -void libcfs_sock_reset_cb(cfs_socket_t *sock); - -#else /* !__DARWIN8__ */ - -#define SOCK_WMEM_QUEUED(so) ((so)->so_snd.sb_cc) -#define SOCK_ERROR(so) ((so)->so_error) - -#define SOCK_TEST_NOSPACE(so) (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat) - -#endif /* !__DARWIN8__ */ - -#endif /* __KERNEL END */ - -#endif /* __XNU_CFS_TYPES_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-time.h b/lnet/include/libcfs/darwin/darwin-time.h deleted file mode 100644 index 35862a63d60f75855c08ac4392d61c8cb2800e81..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-time.h +++ /dev/null @@ -1,249 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for XNU kernel - * - */ - -#ifndef __LIBCFS_DARWIN_DARWIN_TIME_H__ -#define __LIBCFS_DARWIN_DARWIN_TIME_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Portable time API */ - -/* - * Platform provides three opaque data-types: - * - * cfs_time_t represents point in time. This is internal kernel - * time rather than "wall clock". This time bears no - * relation to gettimeofday(). - * - * cfs_duration_t represents time interval with resolution of internal - * platform clock - * - * cfs_fs_time_t represents instance in world-visible time. This is - * used in file-system time-stamps - * - * cfs_time_t cfs_time_current(void); - * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); - * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); - * int cfs_time_before (cfs_time_t, cfs_time_t); - * int cfs_time_beforeq(cfs_time_t, cfs_time_t); - * - * cfs_duration_t cfs_duration_build(int64_t); - * - * time_t cfs_duration_sec (cfs_duration_t); - * void cfs_duration_usec(cfs_duration_t, struct timeval *); - * void cfs_duration_nsec(cfs_duration_t, struct timespec *); - * - * void cfs_fs_time_current(cfs_fs_time_t *); - * time_t cfs_fs_time_sec (cfs_fs_time_t *); - * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); - * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); - * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); - * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); - * - * CFS_TIME_FORMAT - * CFS_DURATION_FORMAT - * - */ - -#define ONE_BILLION ((u_int64_t)1000000000) -#define ONE_MILLION 1000000 - -#ifdef __KERNEL__ -#include <sys/types.h> -#include <sys/systm.h> - -#include <sys/kernel.h> - -#include <mach/mach_types.h> -#include <mach/time_value.h> -#include <kern/clock.h> -#include <sys/param.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-utils.h> -#include <libcfs/darwin/darwin-lock.h> - -/* - * There are three way to measure time in OS X: - * 1. nanoseconds - * 2. absolute time (abstime unit equal to the length of one bus cycle), - * schedule of thread/timer are counted by absolute time, but abstime - * in different mac can be different also, so we wouldn't use it. - * 3. clock interval (1sec = 100hz). But clock interval only taken by KPI - * like tsleep(). - * - * We use nanoseconds (uptime, not calendar time) - * - * clock_get_uptime() :get absolute time since bootup. - * nanouptime() :get nanoseconds since bootup - * microuptime() :get microseonds since bootup - * nanotime() :get nanoseconds since epoch - * microtime() :get microseconds since epoch - */ -typedef u_int64_t cfs_time_t; /* nanoseconds */ -typedef int64_t cfs_duration_t; - -#define CFS_TIME_T "%llu" -#define CFS_DURATION_T "%lld" - -typedef struct timeval cfs_fs_time_t; - -static inline cfs_time_t cfs_time_current(void) -{ - struct timespec instant; - - nanouptime(&instant); - return ((u_int64_t)instant.tv_sec) * NSEC_PER_SEC + instant.tv_nsec; -} - -static inline time_t cfs_time_current_sec(void) -{ - struct timespec instant; - - nanouptime(&instant); - return instant.tv_sec; -} - -static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) -{ - return t + d; -} - -static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) -{ - return t1 - t2; -} - -static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) -{ - return (int64_t)t1 - (int64_t)t2 < 0; -} - -static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) -{ - return (int64_t)t1 - (int64_t)t2 <= 0; -} - -static inline void cfs_fs_time_current(cfs_fs_time_t *t) -{ - microtime((struct timeval *)t); -} - -static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) -{ - return t->tv_sec; -} - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - *v = *t; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - s->tv_sec = t->tv_sec; - s->tv_nsec = t->tv_usec * NSEC_PER_USEC; -} - -static inline cfs_duration_t cfs_time_seconds(int seconds) -{ - return (NSEC_PER_SEC * (int64_t)seconds); -} - -/* - * internal helper function used by cfs_fs_time_before*() - */ -static inline int64_t __cfs_fs_time_flat(cfs_fs_time_t *t) -{ - return ((int64_t)t->tv_sec)*NSEC_PER_SEC + t->tv_usec*NSEC_PER_USEC; -} - -static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) < 0; -} - -static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) <= 0; -} - -static inline time_t cfs_duration_sec(cfs_duration_t d) -{ - return d / NSEC_PER_SEC; -} - -static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) -{ - s->tv_sec = d / NSEC_PER_SEC; - s->tv_usec = (d - ((int64_t)s->tv_sec) * NSEC_PER_SEC) / NSEC_PER_USEC; -} - -static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) -{ - s->tv_sec = d / NSEC_PER_SEC; - s->tv_nsec = d - ((int64_t)s->tv_sec) * NSEC_PER_SEC; -} - -#define cfs_time_current_64 cfs_time_current -#define cfs_time_add_64 cfs_time_add -#define cfs_time_shift_64 cfs_time_shift -#define cfs_time_before_64 cfs_time_before -#define cfs_time_beforeq_64 cfs_time_beforeq - -/* - * One jiffy (in nanoseconds) - * - * osfmk/kern/sched_prim.c - * #define DEFAULT_PREEMPTION_RATE 100 - */ -#define CFS_TICK (NSEC_PER_SEC / (u_int64_t)100) - -#define LTIME_S(t) (t) - -/* __KERNEL__ */ -#else - -/* - * User level - */ -#include <libcfs/user-time.h> - -/* __KERNEL__ */ -#endif - -/* __LIBCFS_DARWIN_DARWIN_TIME_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/darwin/darwin-types.h b/lnet/include/libcfs/darwin/darwin-types.h deleted file mode 100644 index 3c247246b2c83d2c482e77a5006a848c9268b8e3..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-types.h +++ /dev/null @@ -1,95 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_DARWIN_XNU_TYPES_H__ -#define __LIBCFS_DARWIN_XNU_TYPES_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#include <mach/mach_types.h> -#include <sys/types.h> - -#ifndef _BLKID_TYPES_H -#define _BLKID_TYPES_H -#endif - -typedef u_int8_t __u8; -typedef u_int16_t __u16; -typedef u_int32_t __u32; -typedef u_int64_t __u64; -typedef int8_t __s8; -typedef int16_t __s16; -typedef int32_t __s32; -typedef int64_t __s64; - -#ifdef __KERNEL__ - -#include <kern/kern_types.h> - - -typedef struct { int e; } event_chan_t; -typedef dev_t kdev_t; - -/* - * Atmoic define - */ -#include <libkern/OSAtomic.h> - -typedef struct { volatile uint32_t counter; } atomic_t; - -#define ATOMIC_INIT(i) { (i) } -#define atomic_read(a) ((a)->counter) -#define atomic_set(a, v) (((a)->counter) = (v)) -#ifdef __DARWIN8__ -/* OS*Atomic return the value before the operation */ -#define atomic_add(v, a) OSAddAtomic(v, (SInt32 *)&((a)->counter)) -#define atomic_sub(v, a) OSAddAtomic(-(v), (SInt32 *)&((a)->counter)) -#define atomic_inc(a) OSIncrementAtomic((SInt32 *)&((a)->counter)) -#define atomic_dec(a) OSDecrementAtomic((SInt32 *)&((a)->counter)) -#else /* !__DARWIN8__ */ -#define atomic_add(v, a) hw_atomic_add((__u32 *)&((a)->counter), v) -#define atomic_sub(v, a) hw_atomic_sub((__u32 *)&((a)->counter), v) -#define atomic_inc(a) atomic_add(1, a) -#define atomic_dec(a) atomic_sub(1, a) -#endif /* !__DARWIN8__ */ -#define atomic_sub_and_test(v, a) (atomic_sub(v, a) == (v)) -#define atomic_dec_and_test(a) (atomic_dec(a) == 1) -#define atomic_inc_return(a) (atomic_inc(a) + 1) -#define atomic_dec_return(a) (atomic_dec(a) - 1) - -#include <libsa/mach/mach.h> -typedef off_t loff_t; - -#else /* !__KERNEL__ */ - -#include <stdint.h> - -typedef off_t loff_t; - -#endif /* __KERNEL END */ -typedef unsigned short umode_t; - -#endif /* __XNU_CFS_TYPES_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-utils.h b/lnet/include/libcfs/darwin/darwin-utils.h deleted file mode 100644 index 0f808a26bfa76c3694379214c84c2a0057c6c2d0..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-utils.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef __LIBCFS_DARWIN_UTILS_H__ -#define __LIBCFS_DARWIN_UTILS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#include <sys/random.h> - -#ifdef __KERNEL__ -inline int isspace(char c); -char *strpbrk(const char *cs, const char *ct); -char * strsep(char **s, const char *ct); -size_t strnlen(const char * s, size_t count); -char * strstr(const char *in, const char *str); -char * strrchr(const char *p, int ch); -char * ul2dstr(unsigned long address, char *buf, int len); - -#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3) -#define simple_strtoul(a1, a2, a3) strtoul(a1, a2, a3) -#define simple_strtoll(a1, a2, a3) strtoq(a1, a2, a3) -#define simple_strtoull(a1, a2, a3) strtouq(a1, a2, a3) - -#define test_bit(i, a) isset(a, i) -#define set_bit(i, a) setbit(a, i) -#define clear_bit(i, a) clrbit(a, i) - -#define get_random_bytes(buf, len) read_random(buf, len) - -#endif /* __KERNEL__ */ - -#ifndef min_t -#define min_t(type,x,y) \ - ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) -#endif -#ifndef max_t -#define max_t(type,x,y) \ - ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; }) -#endif - -#define do_div(n,base) \ - ({ \ - __u64 __n = (n); \ - __u32 __base = (base); \ - __u32 __mod; \ - \ - __mod = __n % __base; \ - n = __n / __base; \ - __mod; \ - }) - -#define NIPQUAD(addr) \ - ((unsigned char *)&addr)[0], \ - ((unsigned char *)&addr)[1], \ - ((unsigned char *)&addr)[2], \ - ((unsigned char *)&addr)[3] - -#define HIPQUAD NIPQUAD - -#ifndef LIST_CIRCLE -#define LIST_CIRCLE(elm, field) \ - do { \ - (elm)->field.le_prev = &(elm)->field.le_next; \ - } while (0) -#endif - -#endif /* __XNU_UTILS_H__ */ diff --git a/lnet/include/libcfs/darwin/kp30.h b/lnet/include/libcfs/darwin/kp30.h deleted file mode 100644 index f9e94b18cc431385d0ceac1de4fd066ea95e8774..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/kp30.h +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_DARWIN_KP30__ -#define __LIBCFS_DARWIN_KP30__ - -#ifndef __LIBCFS_KP30_H__ -#error Do not #include this file directly. #include <libcfs/kp30.h> instead -#endif - -#ifdef __KERNEL__ - -#include <sys/types.h> -#include <sys/malloc.h> -#include <sys/systm.h> -#include <mach/mach_types.h> -#include <string.h> -#include <sys/file.h> -#include <sys/conf.h> -#include <miscfs/devfs/devfs.h> -#include <stdarg.h> - -#include <libcfs/darwin/darwin-lock.h> -#include <libcfs/darwin/darwin-prim.h> -#include <lnet/lnet.h> - -#define our_cond_resched() cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, 1) - -#ifdef CONFIG_SMP -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ -#else -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) -#endif -#define LASSERT_SEM_LOCKED(sem) do {} while(0) /* XXX */ - -#define LIBCFS_PANIC(msg) panic(msg) -#error libcfs_register_panic_notifier() missing -#error libcfs_unregister_panic_notifier() missing - -/* --------------------------------------------------------------------- */ - -#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x) -#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x) - -#define PORTAL_SYMBOL_GET(x) ((typeof(&x))cfs_symbol_get(#x)) -#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x) - -#define PORTAL_MODULE_USE do{int i = 0; i++;}while(0) -#define PORTAL_MODULE_UNUSE do{int i = 0; i--;}while(0) - -#define num_online_cpus() cfs_online_cpus() - -/******************************************************************************/ -/* XXX Liang: There is no module parameter supporting in OSX */ -#define CFS_MODULE_PARM(name, t, type, perm, desc) - -#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */ -/******************************************************************************/ - -#else /* !__KERNEL__ */ -# include <stdio.h> -# include <stdlib.h> -# include <stdint.h> -# include <unistd.h> -# include <time.h> -# include <machine/limits.h> -# include <sys/types.h> -#endif - -#define BITS_PER_LONG LONG_BIT -/******************************************************************************/ -/* Light-weight trace - * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 0 - -typedef struct { - long long lwte_when; - char *lwte_where; - void *lwte_task; - long lwte_p1; - long lwte_p2; - long lwte_p3; - long lwte_p4; -} lwt_event_t; - -# define LWT_EVENT(p1,p2,p3,p4) /* no lwt implementation yet */ - -/* -------------------------------------------------------------------------- */ - -#define IOCTL_LIBCFS_TYPE struct libcfs_ioctl_data - -#define LPU64 "%llu" -#define LPD64 "%lld" -#define LPX64 "%#llx" -#define LPSZ "%lu" -#define LPSSZ "%ld" -# define LI_POISON ((int)0x5a5a5a5a) -# define LL_POISON ((long)0x5a5a5a5a) -# define LP_POISON ((void *)(long)0x5a5a5a5a) - -#endif diff --git a/lnet/include/libcfs/darwin/libcfs.h b/lnet/include/libcfs/darwin/libcfs.h deleted file mode 100644 index eb4d8f35982bb076c246eca81fb6ad0c5cd5de8f..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/libcfs.h +++ /dev/null @@ -1,193 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_DARWIN_LIBCFS_H__ -#define __LIBCFS_DARWIN_LIBCFS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#include <mach/mach_types.h> -#include <sys/errno.h> -#include <string.h> -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-time.h> -#include <libcfs/darwin/darwin-prim.h> -#include <libcfs/darwin/darwin-mem.h> -#include <libcfs/darwin/darwin-lock.h> -#include <libcfs/darwin/darwin-fs.h> -#include <libcfs/darwin/darwin-tcpip.h> - -#ifdef __KERNEL__ -# include <sys/types.h> -# include <sys/time.h> -# define do_gettimeofday(tv) microuptime(tv) -#else -# include <sys/time.h> -# define do_gettimeofday(tv) gettimeofday(tv, NULL); -typedef unsigned long long cycles_t; -#endif - -#define __cpu_to_le64(x) OSSwapHostToLittleInt64(x) -#define __cpu_to_le32(x) OSSwapHostToLittleInt32(x) -#define __cpu_to_le16(x) OSSwapHostToLittleInt16(x) - -#define __le16_to_cpu(x) OSSwapLittleToHostInt16(x) -#define __le32_to_cpu(x) OSSwapLittleToHostInt32(x) -#define __le64_to_cpu(x) OSSwapLittleToHostInt64(x) - -#define cpu_to_le64(x) __cpu_to_le64(x) -#define cpu_to_le32(x) __cpu_to_le32(x) -#define cpu_to_le16(x) __cpu_to_le16(x) - -#define le64_to_cpu(x) __le64_to_cpu(x) -#define le32_to_cpu(x) __le32_to_cpu(x) -#define le16_to_cpu(x) __le16_to_cpu(x) - -#define __swab16(x) OSSwapInt16(x) -#define __swab32(x) OSSwapInt32(x) -#define __swab64(x) OSSwapInt64(x) -#define __swab16s(x) do { *(x) = __swab16(*(x)); } while (0) -#define __swab32s(x) do { *(x) = __swab32(*(x)); } while (0) -#define __swab64s(x) do { *(x) = __swab64(*(x)); } while (0) - -struct ptldebug_header { - __u32 ph_len; - __u32 ph_flags; - __u32 ph_subsys; - __u32 ph_mask; - __u32 ph_cpu_id; - __u32 ph_sec; - __u64 ph_usec; - __u32 ph_stack; - __u32 ph_pid; - __u32 ph_extern_pid; - __u32 ph_line_num; -} __attribute__((packed)); - - -#ifdef __KERNEL__ -# include <sys/systm.h> -# include <pexpert/pexpert.h> -/* Fix me */ -# define THREAD_SIZE 8192 -#else -# define THREAD_SIZE 8192 -#endif -#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) - -#define CHECK_STACK() do { } while(0) -#define CDEBUG_STACK() (0L) - -/* Darwin has defined RETURN, so we have to undef it in lustre */ -#ifdef RETURN -#undef RETURN -#endif - -/* - * When this is enabled debugging messages are indented according to the - * current "nesting level". Nesting level in increased when ENTRY macro - * is executed, and decreased on EXIT and RETURN. - */ -#ifdef __KERNEL__ -#define ENTRY_NESTING_SUPPORT (0) -#endif - -#if ENTRY_NESTING_SUPPORT - -/* - * Currently ENTRY_NESTING_SUPPORT is only supported for XNU port. Basic - * idea is to keep per-thread pointer to small data structure (struct - * cfs_debug_data) describing current nesting level. In XNU unused - * proc->p_wmegs field in hijacked for this. On Linux - * current->journal_info can be used. In user space - * pthread_{g,s}etspecific(). - * - * ENTRY macro allocates new cfs_debug_data on stack, and installs it as - * a current nesting level, storing old data in cfs_debug_data it just - * created. - * - * EXIT pops old value back. - * - */ - -/* - * One problem with this approach is that there is a lot of code that - * does ENTRY and then escapes scope without doing EXIT/RETURN. In this - * case per-thread current nesting level pointer is dangling (it points - * to the stack area that is possible already overridden). To detect - * such cases, we add two magic fields to the cfs_debug_data and check - * them whenever current nesting level pointer is dereferenced. While - * looking flaky this works because stack is always consumed - * "continously". - */ -enum { - CDD_MAGIC1 = 0x02128506, - CDD_MAGIC2 = 0x42424242 -}; - -struct cfs_debug_data { - unsigned int magic1; - struct cfs_debug_data *parent; - int nesting_level; - unsigned int magic2; -}; - -void __entry_nesting(struct cfs_debug_data *child); -void __exit_nesting(struct cfs_debug_data *child); -unsigned int __current_nesting_level(void); - -#define ENTRY_NESTING \ -struct cfs_debug_data __cdd = { .magic1 = CDD_MAGIC1, \ - .parent = NULL, \ - .nesting_level = 0, \ - .magic2 = CDD_MAGIC2 }; \ -__entry_nesting(&__cdd); - -#define EXIT_NESTING __exit_nesting(&__cdd) - -/* ENTRY_NESTING_SUPPORT */ -#else - -#define ENTRY_NESTING do {;} while (0) -#define EXIT_NESTING do {;} while (0) -#define __current_nesting_level() (0) - -/* ENTRY_NESTING_SUPPORT */ -#endif - -#define LUSTRE_LNET_PID 12345 - -#define _XNU_LIBCFS_H - -/* - * Platform specific declarations for cfs_curproc API (libcfs/curproc.h) - * - * Implementation is in darwin-curproc.c - */ -#define CFS_CURPROC_COMM_MAX MAXCOMLEN -/* - * XNU has no capabilities - */ -typedef int cfs_kernel_cap_t; - -#ifdef __KERNEL__ -enum { - /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */ - CFS_STACK_TRACE_DEPTH = 16 -}; - -struct cfs_stack_trace { - void *frame[CFS_STACK_TRACE_DEPTH]; -}; - -#define printk(format, args...) printf(format, ## args) - -#ifdef WITH_WATCHDOG -#undef WITH_WATCHDOG -#endif - -#endif /* __KERNEL__ */ - -#endif /* _XNU_LIBCFS_H */ diff --git a/lnet/include/libcfs/darwin/lltrace.h b/lnet/include/libcfs/darwin/lltrace.h deleted file mode 100644 index 31d6e17f2812a3a4435b0688d451cffca5e65c01..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/lltrace.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_DARWIN_LLTRACE_H__ -#define __LIBCFS_DARWIN_LLTRACE_H__ - -#ifndef __LIBCFS_LLTRACE_H__ -#error Do not #include this file directly. #include <libcfs/lltrace.h> instead -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/time.h> -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <mach/vm_param.h> -#include <lnet/lnetctl.h> - -#endif diff --git a/lnet/include/libcfs/darwin/portals_utils.h b/lnet/include/libcfs/darwin/portals_utils.h deleted file mode 100644 index 4907cb15e2576159cb975c39edbc031bf625728d..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/portals_utils.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef __LIBCFS_DARWIN_PORTALS_UTILS_H__ -#define __LIBCFS_DARWIN_PORTALS_UTILS_H__ - -#ifndef __LIBCFS_PORTALS_UTILS_H__ -#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead -#endif - -#include <libcfs/list.h> -#ifdef __KERNEL__ -#include <mach/mach_types.h> -#include <libcfs/libcfs.h> -#else /* !__KERNEL__ */ -#include <machine/endian.h> -#include <netinet/in.h> -#include <sys/syscall.h> -#endif /* !__KERNEL__ */ - -#endif diff --git a/lnet/include/libcfs/kp30.h b/lnet/include/libcfs/kp30.h deleted file mode 100644 index dcd599b7be57d3d3a3072cd991f55b9784d52d0f..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/kp30.h +++ /dev/null @@ -1,610 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_KP30_H__ -#define __LIBCFS_KP30_H__ - -/* Controlled via configure key */ -/* #define LIBCFS_DEBUG */ - -#include <libcfs/libcfs.h> -#include <lnet/types.h> - -#if defined(__linux__) -#include <libcfs/linux/kp30.h> -#elif defined(__APPLE__) -#include <libcfs/darwin/kp30.h> -#elif defined(__WINNT__) -#include <libcfs/winnt/kp30.h> -#else -#error Unsupported operating system -#endif - -#ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -#endif - -#ifdef __KERNEL__ - -#ifdef LIBCFS_DEBUG - -/* - * When this is on, LASSERT macro includes check for assignment used instead - * of equality check, but doesn't have unlikely(). Turn this on from time to - * time to make test-builds. This shouldn't be on for production release. - */ -#define LASSERT_CHECKED (0) - -#if LASSERT_CHECKED -/* - * Assertion. - * - * Strange construction with empty "then" clause is used to trigger compiler - * warnings on the assertions of the form LASSERT(a = b); - * - * "warning: suggest parentheses around assignment used as truth value" - * - * requires -Wall. Unfortunately this rules out use of likely/unlikely. - */ -#define LASSERT(cond) \ -({ \ - if (cond) \ - ; \ - else \ - libcfs_assertion_failed( #cond , __FILE__, \ - __FUNCTION__, __LINE__); \ -}) - -#define LASSERTF(cond, fmt, a...) \ -({ \ - if (cond) \ - ; \ - else { \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ - __FILE__, __FUNCTION__,__LINE__, \ - "ASSERTION(" #cond ") failed:" fmt, \ - ## a); \ - LBUG(); \ - } \ -}) - -/* LASSERT_CHECKED */ -#else - -#define LASSERT(cond) \ -({ \ - if (unlikely(!(cond))) \ - libcfs_assertion_failed(#cond , __FILE__, \ - __FUNCTION__, __LINE__); \ -}) - -#define LASSERTF(cond, fmt, a...) \ -({ \ - if (unlikely(!(cond))) { \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ - __FILE__, __FUNCTION__,__LINE__, \ - "ASSERTION(" #cond ") failed:" fmt, \ - ## a); \ - LBUG(); \ - } \ -}) - -/* LASSERT_CHECKED */ -#endif - -/* LIBCFS_DEBUG */ -#else -#define LASSERT(e) ((void)(0)) -#define LASSERTF(cond, fmt...) ((void)(0)) -#endif /* LIBCFS_DEBUG */ - -#define KLASSERT(e) LASSERT(e) - -void lbug_with_loc(char *file, const char *func, const int line) - __attribute__((noreturn)); - -#define LBUG() lbug_with_loc(__FILE__, __FUNCTION__, __LINE__) - -extern atomic_t libcfs_kmemory; -/* - * Memory - */ -#ifdef LIBCFS_DEBUG - -# define libcfs_kmem_inc(ptr, size) \ -do { \ - atomic_add(size, &libcfs_kmemory); \ -} while (0) - -# define libcfs_kmem_dec(ptr, size) do { \ - atomic_sub(size, &libcfs_kmemory); \ -} while (0) - -#else -# define libcfs_kmem_inc(ptr, size) do {} while (0) -# define libcfs_kmem_dec(ptr, size) do {} while (0) -#endif /* LIBCFS_DEBUG */ - -#define LIBCFS_VMALLOC_SIZE 16384 - -#define LIBCFS_ALLOC_GFP(ptr, size, mask) \ -do { \ - LASSERT(!in_interrupt() || \ - (size <= LIBCFS_VMALLOC_SIZE && mask == CFS_ALLOC_ATOMIC));\ - if (unlikely((size) > LIBCFS_VMALLOC_SIZE)) \ - (ptr) = cfs_alloc_large(size); \ - else \ - (ptr) = cfs_alloc((size), (mask)); \ - if (unlikely((ptr) == NULL)) { \ - CERROR("LNET: out of memory at %s:%d (tried to alloc '" \ - #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ - CERROR("LNET: %d total bytes allocated by lnet\n", \ - atomic_read(&libcfs_kmemory)); \ - } else { \ - libcfs_kmem_inc((ptr), (size)); \ - if (!((mask) & CFS_ALLOC_ZERO)) \ - memset((ptr), 0, (size)); \ - } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), (ptr), atomic_read (&libcfs_kmemory)); \ -} while (0) - -#define LIBCFS_ALLOC(ptr, size) \ - LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_IO) - -#define LIBCFS_ALLOC_ATOMIC(ptr, size) \ - LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_ATOMIC) - -#define LIBCFS_FREE(ptr, size) \ -do { \ - int s = (size); \ - if (unlikely((ptr) == NULL)) { \ - CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \ - "%s:%d\n", s, __FILE__, __LINE__); \ - break; \ - } \ - libcfs_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - s, (ptr), atomic_read(&libcfs_kmemory)); \ - if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \ - cfs_free_large(ptr); \ - else \ - cfs_free(ptr); \ -} while (0) - -/******************************************************************************/ - -/* htonl hack - either this, or compile with -O2. Stupid byteorder/generic.h */ -#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) -#define ___htonl(x) __cpu_to_be32(x) -#define ___htons(x) __cpu_to_be16(x) -#define ___ntohl(x) __be32_to_cpu(x) -#define ___ntohs(x) __be16_to_cpu(x) -#define htonl(x) ___htonl(x) -#define ntohl(x) ___ntohl(x) -#define htons(x) ___htons(x) -#define ntohs(x) ___ntohs(x) -#endif - -void libcfs_debug_dumpstack(cfs_task_t *tsk); -void libcfs_run_upcall(char **argv); -void libcfs_run_lbug_upcall(char * file, const char *fn, const int line); -void libcfs_debug_dumplog(void); -int libcfs_debug_init(unsigned long bufsize); -int libcfs_debug_cleanup(void); -int libcfs_debug_clear_buffer(void); -int libcfs_debug_mark_buffer(char *text); - -void libcfs_debug_set_level(unsigned int debug_level); - -#else /* !__KERNEL__ */ -# ifdef LIBCFS_DEBUG -# undef NDEBUG -# include <assert.h> -# define LASSERT(e) assert(e) -# define LASSERTF(cond, args...) \ -do { \ - if (!(cond)) \ - CERROR(args); \ - assert(cond); \ -} while (0) -# define LBUG() assert(0) -# else -# define LASSERT(e) ((void)(0)) -# define LASSERTF(cond, args...) do { } while (0) -# define LBUG() ((void)(0)) -# endif /* LIBCFS_DEBUG */ -# define KLASSERT(e) do { } while (0) -# define printk(format, args...) printf (format, ## args) -# ifdef CRAY_XT3 /* buggy calloc! */ -# define LIBCFS_ALLOC(ptr, size) \ - do { \ - (ptr) = malloc(size); \ - memset(ptr, 0, size); \ - } while (0) -# else -# define LIBCFS_ALLOC(ptr, size) do { (ptr) = calloc(1,size); } while (0) -# endif -# define LIBCFS_FREE(a, b) do { free(a); } while (0) - -void libcfs_debug_dumplog(void); -int libcfs_debug_init(unsigned long bufsize); -int libcfs_debug_cleanup(void); - -/* - * Generic compiler-dependent macros required for kernel - * build go below this comment. Actual compiler/compiler version - * specific implementations come from the above header files - */ - -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - -/* !__KERNEL__ */ -#endif - -/* - * compile-time assertions. @cond has to be constant expression. - * ISO C Standard: - * - * 6.8.4.2 The switch statement - * - * .... - * - * [#3] The expression of each case label shall be an integer - * constant expression and no two of the case constant - * expressions in the same switch statement shall have the same - * value after conversion... - * - */ -#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } }) - -/* support decl needed both by kernel and liblustre */ -int libcfs_isknown_lnd(int type); -char *libcfs_lnd2modname(int type); -char *libcfs_lnd2str(int type); -int libcfs_str2lnd(const char *str); -char *libcfs_net2str(__u32 net); -char *libcfs_nid2str(lnet_nid_t nid); -__u32 libcfs_str2net(const char *str); -lnet_nid_t libcfs_str2nid(const char *str); -int libcfs_str2anynid(lnet_nid_t *nid, const char *str); -char *libcfs_id2str(lnet_process_id_t id); -void libcfs_setnet0alias(int type); - -/* how an LNET NID encodes net:address */ -#define LNET_NIDADDR(nid) ((__u32)((nid) & 0xffffffff)) -#define LNET_NIDNET(nid) ((__u32)(((nid) >> 32)) & 0xffffffff) -#define LNET_MKNID(net,addr) ((((__u64)(net))<<32)|((__u64)(addr))) -/* how net encodes type:number */ -#define LNET_NETNUM(net) ((net) & 0xffff) -#define LNET_NETTYP(net) (((net) >> 16) & 0xffff) -#define LNET_MKNET(typ,num) ((((__u32)(typ))<<16)|((__u32)(num))) - -/* implication */ -#define ergo(a, b) (!(a) || (b)) -/* logical equivalence */ -#define equi(a, b) (!!(a) == !!(b)) - -#ifndef CURRENT_TIME -# define CURRENT_TIME time(0) -#endif - -/* -------------------------------------------------------------------- - * Light-weight trace - * Support for temporary event tracing with minimal Heisenberg effect. - * All stuff about lwt are put in arch/kp30.h - * -------------------------------------------------------------------- */ - -struct libcfs_device_userstate -{ - int ldu_memhog_pages; - cfs_page_t *ldu_memhog_root_page; -}; - -/* what used to be in portals_lib.h */ -#ifndef MIN -# define MIN(a,b) (((a)<(b)) ? (a): (b)) -#endif -#ifndef MAX -# define MAX(a,b) (((a)>(b)) ? (a): (b)) -#endif - -#define MKSTR(ptr) ((ptr))? (ptr) : "" - -static inline int size_round4 (int val) -{ - return (val + 3) & (~0x3); -} - -static inline int size_round (int val) -{ - return (val + 7) & (~0x7); -} - -static inline int size_round16(int val) -{ - return (val + 0xf) & (~0xf); -} - -static inline int size_round32(int val) -{ - return (val + 0x1f) & (~0x1f); -} - -static inline int size_round0(int val) -{ - if (!val) - return 0; - return (val + 1 + 7) & (~0x7); -} - -static inline size_t round_strlen(char *fset) -{ - return (size_t)size_round((int)strlen(fset) + 1); -} - -#define LOGL(var,len,ptr) \ -do { \ - if (var) \ - memcpy((char *)ptr, (const char *)var, len); \ - ptr += size_round(len); \ -} while (0) - -#define LOGU(var,len,ptr) \ -do { \ - if (var) \ - memcpy((char *)var, (const char *)ptr, len); \ - ptr += size_round(len); \ -} while (0) - -#define LOGL0(var,len,ptr) \ -do { \ - if (!len) \ - break; \ - memcpy((char *)ptr, (const char *)var, len); \ - *((char *)(ptr) + len) = 0; \ - ptr += size_round(len + 1); \ -} while (0) - -/* - * USER LEVEL STUFF BELOW - */ - -#define LIBCFS_IOCTL_VERSION 0x0001000a - -struct libcfs_ioctl_data { - __u32 ioc_len; - __u32 ioc_version; - - __u64 ioc_nid; - __u64 ioc_u64[1]; - - __u32 ioc_flags; - __u32 ioc_count; - __u32 ioc_net; - __u32 ioc_u32[7]; - - __u32 ioc_inllen1; - char *ioc_inlbuf1; - __u32 ioc_inllen2; - char *ioc_inlbuf2; - - __u32 ioc_plen1; /* buffers in userspace */ - char *ioc_pbuf1; - __u32 ioc_plen2; /* buffers in userspace */ - char *ioc_pbuf2; - - char ioc_bulk[0]; -}; - - -struct libcfs_ioctl_hdr { - __u32 ioc_len; - __u32 ioc_version; -}; - -struct libcfs_debug_ioctl_data -{ - struct libcfs_ioctl_hdr hdr; - unsigned int subs; - unsigned int debug; -}; - -#define LIBCFS_IOC_INIT(data) \ -do { \ - memset(&data, 0, sizeof(data)); \ - data.ioc_version = LIBCFS_IOCTL_VERSION; \ - data.ioc_len = sizeof(data); \ -} while (0) - -/* FIXME check conflict with lustre_lib.h */ -#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long) - -static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data) -{ - int len = sizeof(*data); - len += size_round(data->ioc_inllen1); - len += size_round(data->ioc_inllen2); - return len; -} - -static inline int libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data) -{ - if (data->ioc_len > (1<<30)) { - CERROR ("LIBCFS ioctl: ioc_len larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen1 > (1<<30)) { - CERROR ("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen2 > (1<<30)) { - CERROR ("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - CERROR ("LIBCFS ioctl: inlbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - CERROR ("LIBCFS ioctl: inlbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - CERROR ("LIBCFS ioctl: pbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - CERROR ("LIBCFS ioctl: pbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - CERROR ("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n"); - return 1; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n"); - return 1; - } - if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) { - CERROR ("LIBCFS ioctl: packlen != ioc_len\n"); - return 1; - } - if (data->ioc_inllen1 && - data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { - CERROR ("LIBCFS ioctl: inlbuf1 not 0 terminated\n"); - return 1; - } - if (data->ioc_inllen2 && - data->ioc_bulk[size_round(data->ioc_inllen1) + - data->ioc_inllen2 - 1] != '\0') { - CERROR ("LIBCFS ioctl: inlbuf2 not 0 terminated\n"); - return 1; - } - return 0; -} - -#ifndef __KERNEL__ -static inline int libcfs_ioctl_pack(struct libcfs_ioctl_data *data, char **pbuf, - int max) -{ - char *ptr; - struct libcfs_ioctl_data *overlay; - data->ioc_len = libcfs_ioctl_packlen(data); - data->ioc_version = LIBCFS_IOCTL_VERSION; - - if (*pbuf && libcfs_ioctl_packlen(data) > max) - return 1; - if (*pbuf == NULL) { - *pbuf = malloc(data->ioc_len); - } - if (!*pbuf) - return 1; - overlay = (struct libcfs_ioctl_data *)*pbuf; - memcpy(*pbuf, data, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (libcfs_ioctl_is_invalid(overlay)) - return 1; - - return 0; -} - -#else - -extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg); -extern int libcfs_ioctl_popdata(void *arg, void *buf, int size); - -#endif - -/* ioctls for manipulating snapshots 30- */ -#define IOC_LIBCFS_TYPE 'e' -#define IOC_LIBCFS_MIN_NR 30 -/* libcfs ioctls */ -#define IOC_LIBCFS_PANIC _IOWR('e', 30, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LWT_CONTROL _IOWR('e', 33, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LWT_SNAPSHOT _IOWR('e', 34, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LWT_LOOKUP_STRING _IOWR('e', 35, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_MEMHOG _IOWR('e', 36, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PING_TEST _IOWR('e', 37, IOCTL_LIBCFS_TYPE) -/* lnet ioctls */ -#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_ADD_ROUTE _IOWR('e', 52, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEL_ROUTE _IOWR('e', 53, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_ROUTE _IOWR('e', 54, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE) -/* lnd ioctls */ -#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_TXDESC _IOWR('e', 77, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_GMID _IOWR('e', 81, IOCTL_LIBCFS_TYPE) - -#define IOC_LIBCFS_MAX_NR 81 - - -enum { - /* Only add to these values (i.e. don't ever change or redefine them): - * network addresses depend on them... */ - QSWLND = 1, - SOCKLND = 2, - GMLND = 3, - PTLLND = 4, - O2IBLND = 5, - CIBLND = 6, - OPENIBLND = 7, - IIBLND = 8, - LOLND = 9, - RALND = 10, - VIBLND = 11, - MXLND = 12, -}; - -enum { - DEBUG_DAEMON_START = 1, - DEBUG_DAEMON_STOP = 2, - DEBUG_DAEMON_PAUSE = 3, - DEBUG_DAEMON_CONTINUE = 4, -}; - - -enum cfg_record_type { - PORTALS_CFG_TYPE = 1, - LUSTRE_CFG_TYPE = 123, -}; - -typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); - -/* lustre_id output helper macros */ -#define DLID4 "%lu/%lu/%lu/%lu" - -#define OLID4(id) \ - (unsigned long)(id)->li_fid.lf_id, \ - (unsigned long)(id)->li_fid.lf_group, \ - (unsigned long)(id)->li_stc.u.e3s.l3s_ino, \ - (unsigned long)(id)->li_stc.u.e3s.l3s_gen - -#endif diff --git a/lnet/include/libcfs/libcfs.h b/lnet/include/libcfs/libcfs.h deleted file mode 100644 index 80518b1ee5dd3671c41e8fce2facd94abd5c626d..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/libcfs.h +++ /dev/null @@ -1,649 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LIBCFS_H__ -#define __LIBCFS_LIBCFS_H__ - -#if !__GNUC__ -#define __attribute__(x) -#endif - -#if defined(__linux__) -#include <libcfs/linux/libcfs.h> -#elif defined(__APPLE__) -#include <libcfs/darwin/libcfs.h> -#elif defined(__WINNT__) -#include <libcfs/winnt/libcfs.h> -#else -#error Unsupported operating system. -#endif - -#include "curproc.h" - -#ifndef __KERNEL__ -#include <stdio.h> -#endif - -/* Controlled via configure key */ -/* #define LIBCFS_DEBUG */ - -#ifndef offsetof -# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) -#endif - -/* cardinality of array */ -#define sizeof_array(a) ((sizeof (a)) / (sizeof ((a)[0]))) - -#if !defined(container_of) -/* given a pointer @ptr to the field @member embedded into type (usually - * struct) @type, return pointer to the embedding instance of @type. */ -#define container_of(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) -#endif - -#define container_of0(ptr, type, member) \ -({ \ - typeof(ptr) __ptr = (ptr); \ - type *__res; \ - \ - if (unlikely(IS_ERR(__ptr) || __ptr == NULL)) \ - __res = (type *)__ptr; \ - else \ - __res = container_of(__ptr, type, member); \ - __res; \ -}) - -/* - * true iff @i is power-of-2 - */ -#define IS_PO2(i) \ -({ \ - typeof(i) __i; \ - \ - __i = (i); \ - !(__i & (__i - 1)); \ -}) - -#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) - -/* - * Debugging - */ -extern unsigned int libcfs_subsystem_debug; -extern unsigned int libcfs_stack; -extern unsigned int libcfs_debug; -extern unsigned int libcfs_printk; -extern unsigned int libcfs_console_ratelimit; -extern cfs_duration_t libcfs_console_max_delay; -extern cfs_duration_t libcfs_console_min_delay; -extern unsigned int libcfs_console_backoff; -extern unsigned int libcfs_debug_binary; -extern char debug_file_path[1024]; - -int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys); -int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys); - -/* Has there been an LBUG? */ -extern unsigned int libcfs_catastrophe; -extern unsigned int libcfs_panic_on_lbug; - -/* - * struct ptldebug_header is defined in libcfs/<os>/libcfs.h - */ - -#define PH_FLAG_FIRST_RECORD 1 - -/* Debugging subsystems (32 bits, non-overlapping) */ -/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ -#define S_UNDEFINED 0x00000001 -#define S_MDC 0x00000002 -#define S_MDS 0x00000004 -#define S_OSC 0x00000008 -#define S_OST 0x00000010 -#define S_CLASS 0x00000020 -#define S_LOG 0x00000040 -#define S_LLITE 0x00000080 -#define S_RPC 0x00000100 -#define S_MGMT 0x00000200 -#define S_LNET 0x00000400 -#define S_LND 0x00000800 /* ALL LNDs */ -#define S_PINGER 0x00001000 -#define S_FILTER 0x00002000 -/* unused */ -#define S_ECHO 0x00008000 -#define S_LDLM 0x00010000 -#define S_LOV 0x00020000 -/* unused */ -/* unused */ -/* unused */ -/* unused */ -/* unused */ -#define S_LMV 0x00800000 /* b_new_cmd */ -/* unused */ -#define S_SEC 0x02000000 /* upcall cache */ -#define S_GSS 0x04000000 /* b_new_cmd */ -/* unused */ -#define S_MGC 0x10000000 -#define S_MGS 0x20000000 -#define S_FID 0x40000000 /* b_new_cmd */ -#define S_FLD 0x80000000 /* b_new_cmd */ -/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ - -/* Debugging masks (32 bits, non-overlapping) */ -/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ -#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ -#define D_INODE 0x00000002 -#define D_SUPER 0x00000004 -#define D_EXT2 0x00000008 /* anything from ext2_debug */ -#define D_MALLOC 0x00000010 /* print malloc, free information */ -#define D_CACHE 0x00000020 /* cache-related items */ -#define D_INFO 0x00000040 /* general information */ -#define D_IOCTL 0x00000080 /* ioctl related information */ -#define D_NETERROR 0x00000100 /* network errors */ -#define D_NET 0x00000200 /* network communications */ -#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */ -#define D_BUFFS 0x00000800 -#define D_OTHER 0x00001000 -#define D_DENTRY 0x00002000 -#define D_NETTRACE 0x00004000 -#define D_PAGE 0x00008000 /* bulk page handling */ -#define D_DLMTRACE 0x00010000 -#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */ -#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ -#define D_HA 0x00080000 /* recovery and failover */ -#define D_RPCTRACE 0x00100000 /* for distributed debugging */ -#define D_VFSTRACE 0x00200000 -#define D_READA 0x00400000 /* read-ahead */ -#define D_MMAP 0x00800000 -#define D_CONFIG 0x01000000 -#define D_CONSOLE 0x02000000 -#define D_QUOTA 0x04000000 -#define D_SEC 0x08000000 -/* keep these in sync with lnet/{utils,libcfs}/debug.c */ - -#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE) - -#ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -#endif - -#define CDEBUG_DEFAULT_MAX_DELAY (cfs_time_seconds(600)) /* jiffies */ -#define CDEBUG_DEFAULT_MIN_DELAY ((cfs_time_seconds(1) + 1) / 2) /* jiffies */ -#define CDEBUG_DEFAULT_BACKOFF 2 -typedef struct { - cfs_time_t cdls_next; - int cdls_count; - cfs_duration_t cdls_delay; -} cfs_debug_limit_state_t; - -/* Controlled via configure key */ -/* #define CDEBUG_ENABLED */ - -#if defined(__KERNEL__) || (defined(__arch_lib__) && !defined(LUSTRE_UTILS)) - -#ifdef CDEBUG_ENABLED -#define __CDEBUG(cdls, mask, format, a...) \ -do { \ - CHECK_STACK(); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, mask, \ - __FILE__, __FUNCTION__, __LINE__, \ - format, ## a); \ -} while (0) - -#define CDEBUG(mask, format, a...) __CDEBUG(NULL, mask, format, ## a) - -#define CDEBUG_LIMIT(mask, format, a...) \ -do { \ - static cfs_debug_limit_state_t cdls; \ - \ - __CDEBUG(&cdls, mask, format, ## a); \ -} while (0) - -#else /* CDEBUG_ENABLED */ -#define CDEBUG(mask, format, a...) (void)(0) -#define CDEBUG_LIMIT(mask, format, a...) (void)(0) -#warning "CDEBUG IS DISABLED. THIS SHOULD NEVER BE DONE FOR PRODUCTION!" -#endif - -#else - -#define CDEBUG(mask, format, a...) \ -do { \ - if (((mask) & D_CANTMASK) != 0) \ - fprintf(stderr, "(%s:%d:%s()) " format, \ - __FILE__, __LINE__, __FUNCTION__, ## a); \ -} while (0) - -#define CDEBUG_LIMIT CDEBUG - -#endif /* !__KERNEL__ */ - -/* - * Lustre Error Checksum: calculates checksum - * of Hex number by XORing each bit. - */ -#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \ - ((hexnum) >> 8 & 0xf)) - -#define CWARN(format, a...) CDEBUG_LIMIT(D_WARNING, format, ## a) -#define CERROR(format, a...) CDEBUG_LIMIT(D_ERROR, format, ## a) -#define CEMERG(format, a...) CDEBUG_LIMIT(D_EMERG, format, ## a) - -#define LCONSOLE(mask, format, a...) CDEBUG(D_CONSOLE | (mask), format, ## a) -#define LCONSOLE_INFO(format, a...) CDEBUG_LIMIT(D_CONSOLE, format, ## a) -#define LCONSOLE_WARN(format, a...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## a) -#define LCONSOLE_ERROR_MSG(errnum, format, a...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \ - "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## a) -#define LCONSOLE_ERROR(format, a...) LCONSOLE_ERROR_MSG(0x00, format, ## a) - -#define LCONSOLE_EMERG(format, a...) CDEBUG(D_CONSOLE | D_EMERG, format, ## a) - -#ifdef CDEBUG_ENABLED - -#define GOTO(label, rc) \ -do { \ - long GOTO__ret = (long)(rc); \ - CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \ - #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\ - (signed long)GOTO__ret); \ - goto label; \ -} while (0) -#else -#define GOTO(label, rc) do { ((void)(rc)); goto label; } while (0) -#endif - -/* Controlled via configure key */ -/* #define CDEBUG_ENTRY_EXIT */ - -#ifdef CDEBUG_ENTRY_EXIT - -/* - * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise - * there will be a warning in osx. - */ -#define RETURN(rc) \ -do { \ - typeof(rc) RETURN__ret = (rc); \ - CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \ - (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\ - EXIT_NESTING; \ - return RETURN__ret; \ -} while (0) - -#define ENTRY \ -ENTRY_NESTING; \ -do { \ - CDEBUG(D_TRACE, "Process entered\n"); \ -} while (0) - -#define EXIT \ -do { \ - CDEBUG(D_TRACE, "Process leaving\n"); \ - EXIT_NESTING; \ -} while(0) -#else /* !CDEBUG_ENTRY_EXIT */ - -#define RETURN(rc) return (rc) -#define ENTRY do { } while (0) -#define EXIT do { } while (0) - -#endif /* !CDEBUG_ENTRY_EXIT */ - -/* - * Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses - * Lustre RETURN(NULL) macro. - */ -#if defined(NULL) -#undef NULL -#endif - -#define NULL ((void *)0) - -#define LUSTRE_SRV_LNET_PID LUSTRE_LNET_PID - -#ifdef __KERNEL__ - -#include <libcfs/list.h> - -struct libcfs_ioctl_data; /* forward ref */ - -struct libcfs_ioctl_handler { - struct list_head item; - int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_data *data); -}; - -#define DECLARE_IOCTL_HANDLER(ident, func) \ - struct libcfs_ioctl_handler ident = { \ - /* .item = */ CFS_LIST_HEAD_INIT(ident.item), \ - /* .handle_ioctl = */ func \ - } - -int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand); -int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); - -/* libcfs tcpip */ -int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask); -int libcfs_ipif_enumerate(char ***names); -void libcfs_ipif_free_enumeration(char **names, int n); -int libcfs_sock_listen(cfs_socket_t **sockp, __u32 ip, int port, int backlog); -int libcfs_sock_accept(cfs_socket_t **newsockp, cfs_socket_t *sock); -void libcfs_sock_abort_accept(cfs_socket_t *sock); -int libcfs_sock_connect(cfs_socket_t **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port); -int libcfs_sock_setbuf(cfs_socket_t *socket, int txbufsize, int rxbufsize); -int libcfs_sock_getbuf(cfs_socket_t *socket, int *txbufsize, int *rxbufsize); -int libcfs_sock_getaddr(cfs_socket_t *socket, int remote, __u32 *ip, int *port); -int libcfs_sock_write(cfs_socket_t *sock, void *buffer, int nob, int timeout); -int libcfs_sock_read(cfs_socket_t *sock, void *buffer, int nob, int timeout); -void libcfs_sock_release(cfs_socket_t *sock); - -/* libcfs watchdogs */ -struct lc_watchdog; - -/* Add a watchdog which fires after "time" milliseconds of delay. You have to - * touch it once to enable it. */ -struct lc_watchdog *lc_watchdog_add(int time, - void (*cb)(pid_t pid, void *), - void *data); - -/* Enables a watchdog and resets its timer. */ -void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms); -void lc_watchdog_touch(struct lc_watchdog *lcw); - -/* Disable a watchdog; touch it to restart it. */ -void lc_watchdog_disable(struct lc_watchdog *lcw); - -/* Clean up the watchdog */ -void lc_watchdog_delete(struct lc_watchdog *lcw); - -/* Dump a debug log */ -void lc_watchdog_dumplog(pid_t pid, void *data); - -/* __KERNEL__ */ -#endif - -/* need both kernel and user-land acceptor */ -#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512 -#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023 - -/* - * libcfs pseudo device operations - * - * struct cfs_psdev_t and - * cfs_psdev_register() and - * cfs_psdev_deregister() are declared in - * libcfs/<os>/cfs_prim.h - * - * It's just draft now. - */ - -struct cfs_psdev_file { - unsigned long off; - void *private_data; - unsigned long reserved1; - unsigned long reserved2; -}; - -struct cfs_psdev_ops { - int (*p_open)(unsigned long, void *); - int (*p_close)(unsigned long, void *); - int (*p_read)(struct cfs_psdev_file *, char *, unsigned long); - int (*p_write)(struct cfs_psdev_file *, char *, unsigned long); - int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *); -}; - -/* - * generic time manipulation functions. - */ - -static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2) -{ - return cfs_time_before(t2, t1); -} - -static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2) -{ - return cfs_time_beforeq(t2, t1); -} - -/* - * return seconds since UNIX epoch - */ -static inline time_t cfs_unix_seconds(void) -{ - cfs_fs_time_t t; - - cfs_fs_time_current(&t); - return (time_t)cfs_fs_time_sec(&t); -} - -static inline cfs_time_t cfs_time_shift(int seconds) -{ - return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds)); -} - -static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small, - struct timeval *result) -{ - long r = (long) ( - (large->tv_sec - small->tv_sec) * ONE_MILLION + - (large->tv_usec - small->tv_usec)); - if (result != NULL) { - result->tv_usec = r % ONE_MILLION; - result->tv_sec = r / ONE_MILLION; - } - return r; -} - -#define CFS_RATELIMIT(seconds) \ -({ \ - /* \ - * XXX nikita: non-portable initializer \ - */ \ - static time_t __next_message = 0; \ - int result; \ - \ - if (cfs_time_after(cfs_time_current(), __next_message)) \ - result = 1; \ - else { \ - __next_message = cfs_time_shift(seconds); \ - result = 0; \ - } \ - result; \ -}) - -struct libcfs_debug_msg_data { - cfs_debug_limit_state_t *msg_cdls; - int msg_subsys; - const char *msg_file; - const char *msg_fn; - int msg_line; -}; - -#define DEBUG_MSG_DATA_INIT(cdls, subsystem, file, func, ln ) { \ - .msg_cdls = (cdls), \ - .msg_subsys = (subsystem), \ - .msg_file = (file), \ - .msg_fn = (func), \ - .msg_line = (ln) \ - } - - -extern int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, - int subsys, int mask, - const char *file, const char *fn, const int line, - const char *format1, va_list args, - const char *format2, ...) - __attribute__ ((format (printf, 9, 10))); - -#define libcfs_debug_vmsg(cdls, subsys, mask, file, fn, line, format, args) \ - libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,format,args,NULL,NULL) - -#define libcfs_debug_msg(cdls, subsys, mask, file, fn, line, format, a...) \ - libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,NULL,NULL,format, ##a) - -#define cdebug_va(cdls, mask, file, func, line, fmt, args) do { \ - CHECK_STACK(); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - libcfs_debug_vmsg(cdls, DEBUG_SUBSYSTEM, (mask), \ - (file), (func), (line), fmt, args); \ -} while(0); - -#define cdebug(cdls, mask, file, func, line, fmt, a...) do { \ - CHECK_STACK(); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, (mask), \ - (file), (func), (line), fmt, ## a); \ -} while(0); - -extern void libcfs_assertion_failed(const char *expr, const char *file, - const char *fn, const int line); - -static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg) -{ - if (cfs_time_after(cfs_time_current(), - cfs_time_add(now, cfs_time_seconds(15)))) - CERROR("slow %s "CFS_TIME_T" sec\n", msg, - cfs_duration_sec(cfs_time_sub(cfs_time_current(),now))); -} - -/* - * helper function similar to do_gettimeofday() of Linux kernel - */ -static inline void cfs_fs_timeval(struct timeval *tv) -{ - cfs_fs_time_t time; - - cfs_fs_time_current(&time); - cfs_fs_time_usec(&time, tv); -} - -/* - * return valid time-out based on user supplied one. Currently we only check - * that time-out is not shorted than allowed. - */ -static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout) -{ - if (timeout < CFS_TICK) - timeout = CFS_TICK; - return timeout; -} - -/* - * Universal memory allocator API - */ -enum cfs_alloc_flags { - /* allocation is not allowed to block */ - CFS_ALLOC_ATOMIC = 0x1, - /* allocation is allowed to block */ - CFS_ALLOC_WAIT = 0x2, - /* allocation should return zeroed memory */ - CFS_ALLOC_ZERO = 0x4, - /* allocation is allowed to call file-system code to free/clean - * memory */ - CFS_ALLOC_FS = 0x8, - /* allocation is allowed to do io to free/clean memory */ - CFS_ALLOC_IO = 0x10, - /* don't report allocation failure to the console */ - CFS_ALLOC_NOWARN = 0x20, - /* standard allocator flag combination */ - CFS_ALLOC_STD = CFS_ALLOC_FS | CFS_ALLOC_IO, - CFS_ALLOC_USER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO, -}; - -/* flags for cfs_page_alloc() in addition to enum cfs_alloc_flags */ -enum cfs_alloc_page_flags { - /* allow to return page beyond KVM. It has to be mapped into KVM by - * cfs_page_map(); */ - CFS_ALLOC_HIGH = 0x40, - CFS_ALLOC_HIGHUSER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO | CFS_ALLOC_HIGH, -}; - -/* - * Drop into debugger, if possible. Implementation is provided by platform. - */ - -void cfs_enter_debugger(void); - -/* - * Defined by platform - */ -void cfs_daemonize(char *str); -int cfs_daemonize_ctxt(char *str); -cfs_sigset_t cfs_get_blocked_sigs(void); -cfs_sigset_t cfs_block_allsigs(void); -cfs_sigset_t cfs_block_sigs(cfs_sigset_t bits); -void cfs_restore_sigs(cfs_sigset_t); -int cfs_signal_pending(void); -void cfs_clear_sigpending(void); -/* - * XXX Liang: - * these macros should be removed in the future, - * we keep them just for keeping libcfs compatible - * with other branches. - */ -#define libcfs_daemonize(s) cfs_daemonize(s) -#define cfs_sigmask_lock(f) do { f= 0; } while (0) -#define cfs_sigmask_unlock(f) do { f= 0; } while (0) - -int convert_server_error(__u64 ecode); -int convert_client_oflag(int cflag, int *result); - -/* - * Stack-tracing filling. - */ - -/* - * Platform-dependent data-type to hold stack frames. - */ -struct cfs_stack_trace; - -/* - * Fill @trace with current back-trace. - */ -void cfs_stack_trace_fill(struct cfs_stack_trace *trace); - -/* - * Return instruction pointer for frame @frame_no. NULL if @frame_no is - * invalid. - */ -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no); - -/* - * Universal open flags. - */ -#define CFS_O_ACCMODE 0003 -#define CFS_O_CREAT 0100 -#define CFS_O_EXCL 0200 -#define CFS_O_NOCTTY 0400 -#define CFS_O_TRUNC 01000 -#define CFS_O_APPEND 02000 -#define CFS_O_NONBLOCK 04000 -#define CFS_O_NDELAY CFS_O_NONBLOCK -#define CFS_O_SYNC 010000 -#define CFS_O_ASYNC 020000 -#define CFS_O_DIRECT 040000 -#define CFS_O_LARGEFILE 0100000 -#define CFS_O_DIRECTORY 0200000 -#define CFS_O_NOFOLLOW 0400000 -#define CFS_O_NOATIME 01000000 - -/* convert local open flags to universal open flags */ -int cfs_oflags2univ(int flags); -/* convert universal open flags to local open flags */ -int cfs_univ2oflags(int flags); - -#define _LIBCFS_H - -#endif /* _LIBCFS_H */ diff --git a/lnet/include/libcfs/linux/.cvsignore b/lnet/include/libcfs/linux/.cvsignore deleted file mode 100644 index 3dda72986fc5af262451a760393b3a7065938c80..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile.in -Makefile diff --git a/lnet/include/libcfs/linux/Makefile.am b/lnet/include/libcfs/linux/Makefile.am deleted file mode 100644 index 072a7ad3c5a5112e495dfd4159fe7503c2b97596..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -EXTRA_DIST := kp30.h libcfs.h linux-fs.h linux-lock.h linux-mem.h \ - linux-prim.h linux-time.h linux-tcpip.h lltrace.h \ - portals_compat25.h portals_utils.h diff --git a/lnet/include/libcfs/linux/kp30.h b/lnet/include/libcfs/linux/kp30.h deleted file mode 100644 index 19355ed6c0bcae75b2b19ec12a3d60845e2a4eb1..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/kp30.h +++ /dev/null @@ -1,379 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LINUX_KP30_H__ -#define __LIBCFS_LINUX_KP30_H__ - -#ifndef __LIBCFS_KP30_H__ -#error Do not #include this file directly. #include <libcfs/kp30.h> instead -#endif - -#ifdef __KERNEL__ -#ifndef AUTOCONF_INCLUDED -# include <linux/config.h> -#endif -# include <linux/kernel.h> -# include <linux/mm.h> -# include <linux/string.h> -# include <linux/stat.h> -# include <linux/init.h> -# include <linux/errno.h> -# include <linux/unistd.h> -# include <asm/system.h> -# include <linux/kmod.h> -# include <linux/notifier.h> -# include <linux/fs.h> -# include <linux/miscdevice.h> -# include <linux/vmalloc.h> -# include <linux/time.h> -# include <linux/slab.h> -# include <linux/interrupt.h> -# include <linux/highmem.h> -# include <linux/module.h> -# include <linux/version.h> -# include <lnet/lnet.h> -# include <linux/smp_lock.h> -# include <asm/atomic.h> -# include <asm/uaccess.h> -# include <linux/rwsem.h> -# include <linux/proc_fs.h> -# include <linux/file.h> -# include <linux/smp.h> -# include <linux/ctype.h> -# include <linux/compiler.h> -# ifdef HAVE_MM_INLINE -# include <linux/mm_inline.h> -# endif -# if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -# include <linux/kallsyms.h> -# include <linux/moduleparam.h> -# endif - -#include <libcfs/linux/portals_compat25.h> - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define schedule_work schedule_task -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_TQUEUE((wq), 0, 0); \ - PREPARE_TQUEUE((wq), (cb), (cbdata)); \ -} while (0) - -#define PageUptodate Page_Uptodate -#define our_recalc_sigpending(current) recalc_sigpending(current) -#define num_online_cpus() smp_num_cpus -static inline void our_cond_resched(void) -{ - if (current->need_resched) - schedule (); -} -#define work_struct_t struct tq_struct -#define cfs_get_work_data(type,field,data) (data) -#else - -#ifdef HAVE_3ARGS_INIT_WORK - -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ -} while (0) - -#define cfs_get_work_data(type,field,data) (data) - -#else - -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_WORK((wq), (void *)(cb)); \ -} while (0) - -#define cfs_get_work_data(type,field,data) container_of(data,type,field) - -#endif - -#define wait_on_page wait_on_page_locked -#define our_recalc_sigpending(current) recalc_sigpending() -#define strtok(a,b) strpbrk(a, b) -static inline void our_cond_resched(void) -{ - cond_resched(); -} -#define work_struct_t struct work_struct - -#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ - -#ifdef CONFIG_SMP -#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock)) -#else -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) -#endif -#define LASSERT_SEM_LOCKED(sem) LASSERT(down_trylock(sem) != 0) - -#define LIBCFS_PANIC(msg) panic(msg) - -/* ------------------------------------------------------------------- */ - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x) -#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x) - -#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x)) -#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) - -#define PORTAL_MODULE_USE MOD_INC_USE_COUNT -#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT -#else - -#define PORTAL_SYMBOL_REGISTER(x) -#define PORTAL_SYMBOL_UNREGISTER(x) - -#define PORTAL_SYMBOL_GET(x) symbol_get(x) -#define PORTAL_SYMBOL_PUT(x) symbol_put(x) - -#define PORTAL_MODULE_USE try_module_get(THIS_MODULE) -#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE) - -#endif - -/******************************************************************************/ -/* Module parameter support */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# define CFS_MODULE_PARM(name, t, type, perm, desc) \ - MODULE_PARM(name, t);\ - MODULE_PARM_DESC(name, desc) - -#else -# define CFS_MODULE_PARM(name, t, type, perm, desc) \ - module_param(name, type, perm);\ - MODULE_PARM_DESC(name, desc) -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) -# define CFS_SYSFS_MODULE_PARM 0 /* no sysfs module parameters */ -#else -# define CFS_SYSFS_MODULE_PARM 1 /* module parameters accessible via sysfs */ -#endif -/******************************************************************************/ - -#if (__GNUC__) -/* Use the special GNU C __attribute__ hack to have the compiler check the - * printf style argument string against the actual argument count and - * types. - */ -#ifdef printf -# warning printf has been defined as a macro... -# undef printf -#endif - -#endif /* __GNUC__ */ - -# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b) -# define printf(format, b...) CDEBUG(D_OTHER, format , ## b) -# define time(a) CURRENT_TIME - -#ifndef num_possible_cpus -#define num_possible_cpus() NR_CPUS -#endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -#define i_size_read(a) ((a)->i_size) -#endif - -#else /* !__KERNEL__ */ -# include <stdio.h> -# include <stdlib.h> -#if defined(__CYGWIN__) -# include <cygwin-ioctl.h> -#else -# include <stdint.h> -#endif -# include <unistd.h> -# include <time.h> -# include <limits.h> -# include <errno.h> -# include <sys/ioctl.h> /* for _IOWR */ -#ifndef _IOWR -#include "ioctl.h" -#endif - -# define CFS_MODULE_PARM(name, t, type, perm, desc) -#define PORTAL_SYMBOL_GET(x) inter_module_get(#x) -#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) - -#endif /* End of !__KERNEL__ */ - -/******************************************************************************/ -/* Light-weight trace - * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 0 - -#define LWT_MEMORY (16<<20) - -#ifndef KLWT_SUPPORT -# if defined(__KERNEL__) -# if !defined(BITS_PER_LONG) -# error "BITS_PER_LONG not defined" -# endif -# elif !defined(__WORDSIZE) -# error "__WORDSIZE not defined" -# else -# define BITS_PER_LONG __WORDSIZE -# endif - -/* kernel hasn't defined this? */ -typedef struct { - long long lwte_when; - char *lwte_where; - void *lwte_task; - long lwte_p1; - long lwte_p2; - long lwte_p3; - long lwte_p4; -# if BITS_PER_LONG > 32 - long lwte_pad; -# endif -} lwt_event_t; -#endif /* !KLWT_SUPPORT */ - -#if LWT_SUPPORT -# ifdef __KERNEL__ -# if !KLWT_SUPPORT - -typedef struct _lwt_page { - struct list_head lwtp_list; - struct page *lwtp_page; - lwt_event_t *lwtp_events; -} lwt_page_t; - -typedef struct { - int lwtc_current_index; - lwt_page_t *lwtc_current_page; -} lwt_cpu_t; - -extern int lwt_enabled; -extern lwt_cpu_t lwt_cpus[]; - -/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. - * This stuff is meant for finding specific problems; it never stays in - * production code... */ - -#define LWTSTR(n) #n -#define LWTWHERE(f,l) f ":" LWTSTR(l) -#define LWT_EVENTS_PER_PAGE (CFS_PAGE_SIZE / sizeof (lwt_event_t)) - -#define LWT_EVENT(p1, p2, p3, p4) \ -do { \ - unsigned long flags; \ - lwt_cpu_t *cpu; \ - lwt_page_t *p; \ - lwt_event_t *e; \ - \ - if (lwt_enabled) { \ - local_irq_save (flags); \ - \ - cpu = &lwt_cpus[smp_processor_id()]; \ - p = cpu->lwtc_current_page; \ - e = &p->lwtp_events[cpu->lwtc_current_index++]; \ - \ - if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \ - cpu->lwtc_current_page = \ - list_entry (p->lwtp_list.next, \ - lwt_page_t, lwtp_list); \ - cpu->lwtc_current_index = 0; \ - } \ - \ - e->lwte_when = get_cycles(); \ - e->lwte_where = LWTWHERE(__FILE__,__LINE__); \ - e->lwte_task = current; \ - e->lwte_p1 = (long)(p1); \ - e->lwte_p2 = (long)(p2); \ - e->lwte_p3 = (long)(p3); \ - e->lwte_p4 = (long)(p4); \ - \ - local_irq_restore (flags); \ - } \ -} while (0) - -#endif /* !KLWT_SUPPORT */ - -extern int lwt_init (void); -extern void lwt_fini (void); -extern int lwt_lookup_string (int *size, char *knlptr, - char *usrptr, int usrsize); -extern int lwt_control (int enable, int clear); -extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, - void *user_ptr, int user_size); -# else /* __KERNEL__ */ -# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */ -# endif /* __KERNEL__ */ -#endif /* LWT_SUPPORT */ - -/* ------------------------------------------------------------------ */ - -#define IOCTL_LIBCFS_TYPE long - -#ifdef __CYGWIN__ -# ifndef BITS_PER_LONG -# if (~0UL) == 0xffffffffUL -# define BITS_PER_LONG 32 -# else -# define BITS_PER_LONG 64 -# endif -# endif -#endif - -#if BITS_PER_LONG > 32 -# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) -# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) -# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) -#else -# define LI_POISON ((int)0x5a5a5a5a) -# define LL_POISON ((long)0x5a5a5a5a) -# define LP_POISON ((void *)(long)0x5a5a5a5a) -#endif - -/* this is a bit chunky */ - -#if defined(__KERNEL__) - #define _LWORDSIZE BITS_PER_LONG -#else - #define _LWORDSIZE __WORDSIZE -#endif - -#if (defined(__x86_64__) && (defined(__KERNEL__) || defined(CRAY_XT3))) || defined(HAVE_U64_LONG_LONG) -/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */ -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPF64 "L" -#elif (_LWORDSIZE == 32) -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPF64 "L" -#elif (_LWORDSIZE == 64) -# define LPU64 "%lu" -# define LPD64 "%ld" -# define LPX64 "%#lx" -# define LPF64 "l" -#endif - -#ifdef HAVE_SIZE_T_LONG -# define LPSZ "%lu" -#else -# define LPSZ "%u" -#endif - -#ifdef HAVE_SSIZE_T_LONG -# define LPSSZ "%ld" -#else -# define LPSSZ "%d" -#endif - -#ifndef LPU64 -# error "No word size defined" -#endif - -#undef _LWORDSIZE - -#endif diff --git a/lnet/include/libcfs/linux/libcfs.h b/lnet/include/libcfs/linux/libcfs.h deleted file mode 100644 index c873c2fc2fdd82dc6401c56cbea00f8564537d80..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/libcfs.h +++ /dev/null @@ -1,173 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LINUX_LIBCFS_H__ -#define __LIBCFS_LINUX_LIBCFS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef HAVE_ASM_TYPES_H -#include <asm/types.h> -#else -#include <libcfs/types.h> -#endif - -#include <stdarg.h> -#include <libcfs/linux/linux-time.h> -#include <libcfs/linux/linux-mem.h> -#include <libcfs/linux/linux-prim.h> -#include <libcfs/linux/linux-lock.h> -#include <libcfs/linux/linux-fs.h> -#include <libcfs/linux/linux-tcpip.h> - - -#ifdef __KERNEL__ -# include <linux/types.h> -# include <linux/time.h> -# include <asm/timex.h> -#else -# include <sys/types.h> -# include <sys/time.h> -# define do_gettimeofday(tv) gettimeofday(tv, NULL); -typedef unsigned long long cycles_t; -#endif - -#ifndef __KERNEL__ -/* Userpace byte flipping */ -# include <endian.h> -# include <byteswap.h> -# define __swab16(x) bswap_16(x) -# define __swab32(x) bswap_32(x) -# define __swab64(x) bswap_64(x) -# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0) -# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0) -# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0) -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define le16_to_cpu(x) (x) -# define cpu_to_le16(x) (x) -# define le32_to_cpu(x) (x) -# define cpu_to_le32(x) (x) -# define le64_to_cpu(x) (x) -# define cpu_to_le64(x) (x) - -# define be16_to_cpu(x) bswap_16(x) -# define cpu_to_be16(x) bswap_16(x) -# define be32_to_cpu(x) bswap_32(x) -# define cpu_to_be32(x) bswap_32(x) -# define be64_to_cpu(x) bswap_64(x) -# define cpu_to_be64(x) bswap_64(x) - -# else -# if __BYTE_ORDER == __BIG_ENDIAN -# define le16_to_cpu(x) bswap_16(x) -# define cpu_to_le16(x) bswap_16(x) -# define le32_to_cpu(x) bswap_32(x) -# define cpu_to_le32(x) bswap_32(x) -# define le64_to_cpu(x) bswap_64(x) -# define cpu_to_le64(x) bswap_64(x) - -# define be16_to_cpu(x) (x) -# define cpu_to_be16(x) (x) -# define be32_to_cpu(x) (x) -# define cpu_to_be32(x) (x) -# define be64_to_cpu(x) (x) -# define cpu_to_be64(x) (x) - -# else -# error "Unknown byte order" -# endif /* __BIG_ENDIAN */ -# endif /* __LITTLE_ENDIAN */ -#endif /* ! __KERNEL__ */ - -struct ptldebug_header { - __u32 ph_len; - __u32 ph_flags; - __u32 ph_subsys; - __u32 ph_mask; - __u32 ph_cpu_id; - __u32 ph_sec; - __u64 ph_usec; - __u32 ph_stack; - __u32 ph_pid; - __u32 ph_extern_pid; - __u32 ph_line_num; -} __attribute__((packed)); - -#ifdef __KERNEL__ -# include <linux/sched.h> /* THREAD_SIZE */ -#else -# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */ -# define THREAD_SIZE 8192 -# endif -#endif - -#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) - -#if defined(__KERNEL__) && !defined(__x86_64__) -# ifdef __ia64__ -# define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -# else -# define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_frame_address(0) & \ - (THREAD_SIZE - 1))) -# endif /* __ia64__ */ - -#define __CHECK_STACK(file, func, line) \ -do { \ - unsigned long _stack = CDEBUG_STACK(); \ - \ - if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) { \ - libcfs_stack = _stack; \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING, \ - file, func, line, \ - "maximum lustre stack %lu\n", _stack); \ - /*panic("LBUG");*/ \ - } \ -} while (0) -#define CHECK_STACK() __CHECK_STACK(__FILE__, __func__, __LINE__) -#else /* !__KERNEL__ */ -#define __CHECK_STACK(X, Y, Z) do { } while(0) -#define CHECK_STACK() do { } while(0) -#define CDEBUG_STACK() (0L) -#endif /* __KERNEL__ */ - -/* initial pid */ -#define LUSTRE_LNET_PID 12345 - -#define ENTRY_NESTING_SUPPORT (1) -#define ENTRY_NESTING do {;} while (0) -#define EXIT_NESTING do {;} while (0) -#define __current_nesting_level() (0) - -/* - * Platform specific declarations for cfs_curproc API (libcfs/curproc.h) - * - * Implementation is in linux-curproc.c - */ -#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm) - -#if defined(__KERNEL__) -#include <linux/capability.h> -typedef kernel_cap_t cfs_kernel_cap_t; -#else -typedef __u32 cfs_kernel_cap_t; -#endif - -#if defined(__KERNEL__) -/* - * No stack-back-tracing in Linux for now. - */ -struct cfs_stack_trace { -}; - -#ifndef WITH_WATCHDOG -#define WITH_WATCHDOG -#endif - -#endif - -#endif /* _LINUX_LIBCFS_H */ diff --git a/lnet/include/libcfs/linux/linux-fs.h b/lnet/include/libcfs/linux/linux-fs.h deleted file mode 100644 index 7573322dfe62b6b8d774cd9873d5c22a0f5727c0..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-fs.h +++ /dev/null @@ -1,82 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_FS_H__ -#define __LIBCFS_LINUX_CFS_FS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <linux/fs.h> -#include <linux/stat.h> -#include <linux/mount.h> -#else /* !__KERNEL__ */ -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <fcntl.h> -#include <errno.h> -#include <string.h> -#include <sys/mount.h> -#include <mntent.h> -#endif /* __KERNEL__ */ - -typedef struct file cfs_file_t; -typedef struct dentry cfs_dentry_t; -typedef struct dirent64 cfs_dirent_t; - -#ifdef __KERNEL__ -#define cfs_filp_size(f) (i_size_read((f)->f_dentry->d_inode)) -#define cfs_filp_poff(f) (&(f)->f_pos) - -/* - * XXX Do we need to parse flags and mode in cfs_filp_open? - */ -cfs_file_t *cfs_filp_open (const char *name, int flags, int mode, int *err); -#define cfs_filp_close(f) filp_close(f, NULL) -#define cfs_filp_read(fp, buf, size, pos) (fp)->f_op->read((fp), (buf), (size), pos) -#define cfs_filp_write(fp, buf, size, pos) (fp)->f_op->write((fp), (buf), (size), pos) -#define cfs_filp_fsync(fp) (fp)->f_op->fsync((fp), (fp)->f_dentry, 1) - -#define cfs_get_file(f) get_file(f) -#define cfs_put_file(f) fput(f) -#define cfs_file_count(f) file_count(f) - -typedef struct file_lock cfs_flock_t; -#define cfs_flock_type(fl) ((fl)->fl_type) -#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0) -#define cfs_flock_pid(fl) ((fl)->fl_pid) -#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0) -#define cfs_flock_start(fl) ((fl)->fl_start) -#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0) -#define cfs_flock_end(fl) ((fl)->fl_end) -#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0) - -ssize_t cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset); - -#endif - -#endif diff --git a/lnet/include/libcfs/linux/linux-lock.h b/lnet/include/libcfs/linux/linux-lock.h deleted file mode 100644 index 4b51d1b450b0e320d6d5d9d1beaacf444226ecae..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-lock.h +++ /dev/null @@ -1,105 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_LOCK_H__ -#define __LIBCFS_LINUX_CFS_LOCK_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <linux/smp_lock.h> - -/* - * IMPORTANT !!!!!!!! - * - * All locks' declaration are not guaranteed to be initialized, - * Althought some of they are initialized in Linux. All locks - * declared by CFS_DECL_* should be initialized explicitly. - */ - - -/* - * spin_lock (use Linux kernel's primitives) - * - * - spin_lock_init(x) - * - spin_lock(x) - * - spin_unlock(x) - * - spin_trylock(x) - * - * - spin_lock_irqsave(x, f) - * - spin_unlock_irqrestore(x, f) - */ - -/* - * rw_semaphore (use Linux kernel's primitives) - * - * - init_rwsem(x) - * - down_read(x) - * - up_read(x) - * - down_write(x) - * - up_write(x) - */ - -/* - * rwlock_t (use Linux kernel's primitives) - * - * - rwlock_init(x) - * - read_lock(x) - * - read_unlock(x) - * - write_lock(x) - * - write_unlock(x) - */ - -/* - * mutex: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ -#define init_mutex(x) init_MUTEX(x) -#define init_mutex_locked(x) init_MUTEX_LOCKED(x) -#define mutex_up(x) up(x) -#define mutex_down(x) down(x) -#define mutex_down_trylock(x) down_trylock(x) - -/* - * completion (use Linux kernel's primitives) - * - * - init_complition(c) - * - complete(c) - * - wait_for_completion(c) - */ - -/* __KERNEL__ */ -#else - -#include "../user-lock.h" - -/* __KERNEL__ */ -#endif -#endif diff --git a/lnet/include/libcfs/linux/linux-mem.h b/lnet/include/libcfs/linux/linux-mem.h deleted file mode 100644 index fa4ba3d3ae72312fd6efc8c74ec9fd276a3bbd7d..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-mem.h +++ /dev/null @@ -1,129 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_MEM_H__ -#define __LIBCFS_LINUX_CFS_MEM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -# include <linux/mm.h> -# include <linux/vmalloc.h> -# include <linux/pagemap.h> -# include <linux/slab.h> -# ifdef HAVE_MM_INLINE -# include <linux/mm_inline.h> -# endif - -typedef struct page cfs_page_t; -#define CFS_PAGE_SIZE PAGE_CACHE_SIZE -#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT -#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) - -static inline void *cfs_page_address(cfs_page_t *page) -{ - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - return page_address(page); -} - -static inline void *cfs_kmap(cfs_page_t *page) -{ - return kmap(page); -} - -static inline void cfs_kunmap(cfs_page_t *page) -{ - kunmap(page); -} - -static inline void cfs_get_page(cfs_page_t *page) -{ - get_page(page); -} - -static inline int cfs_page_count(cfs_page_t *page) -{ - return page_count(page); -} - -#define cfs_page_index(p) ((p)->index) - -/* - * Memory allocator - * XXX Liang: move these declare to public file - */ -extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags); -extern void cfs_free(void *addr); - -extern void *cfs_alloc_large(size_t nr_bytes); -extern void cfs_free_large(void *addr); - -extern cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order); -extern void __cfs_free_pages(cfs_page_t *page, unsigned int order); - -#define cfs_alloc_page(flags) cfs_alloc_pages(flags, 0) -#define __cfs_free_page(page) __cfs_free_pages(page, 0) -#define cfs_free_page(p) __free_pages(p, 0) - -/* - * In Linux there is no way to determine whether current execution context is - * blockable. - */ -#define CFS_ALLOC_ATOMIC_TRY CFS_ALLOC_ATOMIC - -/* - * SLAB allocator - * XXX Liang: move these declare to public file - */ -#ifdef HAVE_KMEM_CACHE -typedef struct kmem_cache cfs_mem_cache_t; -#else -typedef kmem_cache_t cfs_mem_cache_t; -#endif -extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long); -extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); -extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); -extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); - -/* - */ -#define CFS_DECL_MMSPACE mm_segment_t __oldfs -#define CFS_MMSPACE_OPEN do { __oldfs = get_fs(); set_fs(get_ds());} while(0) -#define CFS_MMSPACE_CLOSE set_fs(__oldfs) - -#else /* !__KERNEL__ */ -#ifdef HAVE_ASM_PAGE_H -#include <asm/page.h> /* needed for PAGE_SIZE - rread */ -#endif - -#include <libcfs/user-prim.h> -/* __KERNEL__ */ -#endif - -#endif /* __LINUX_CFS_MEM_H__ */ diff --git a/lnet/include/libcfs/linux/linux-prim.h b/lnet/include/libcfs/linux/linux-prim.h deleted file mode 100644 index 705499e7ddf77761c647b60cd3d88f132b04c795..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-prim.h +++ /dev/null @@ -1,311 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_PRIM_H__ -#define __LIBCFS_LINUX_CFS_PRIM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/version.h> -#include <linux/proc_fs.h> -#include <linux/mm.h> -#include <linux/timer.h> -#include <linux/signal.h> -#include <linux/sched.h> - -#include <linux/miscdevice.h> -#include <libcfs/linux/portals_compat25.h> -#include <asm/div64.h> - -#include <libcfs/linux/linux-time.h> - -/* - * Pseudo device register - */ -typedef struct miscdevice cfs_psdev_t; -#define cfs_psdev_register(dev) misc_register(dev) -#define cfs_psdev_deregister(dev) misc_deregister(dev) - -/* - * Sysctl register - */ -typedef struct ctl_table cfs_sysctl_table_t; -typedef struct ctl_table_header cfs_sysctl_table_header_t; - -#ifdef HAVE_2ARGS_REGISTER_SYSCTL -#define cfs_register_sysctl_table(t, a) register_sysctl_table(t, a) -#else -#define cfs_register_sysctl_table(t, a) register_sysctl_table(t) -#endif -#define cfs_unregister_sysctl_table(t) unregister_sysctl_table(t) - -/* - * Symbol register - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define cfs_symbol_register(s, p) inter_module_register(s, THIS_MODULE, p) -#define cfs_symbol_unregister(s) inter_module_unregister(s) -#define cfs_symbol_get(s) inter_module_get(s) -#define cfs_symbol_put(s) inter_module_put(s) -#define cfs_module_get() MOD_INC_USE_COUNT -#define cfs_module_put() MOD_DEC_USE_COUNT -#else -#define cfs_symbol_register(s, p) do {} while(0) -#define cfs_symbol_unregister(s) do {} while(0) -#define cfs_symbol_get(s) symbol_get(s) -#define cfs_symbol_put(s) symbol_put(s) -#define cfs_module_get() try_module_get(THIS_MODULE) -#define cfs_module_put() module_put(THIS_MODULE) -#endif - -/* - * Proc file system APIs - */ -typedef read_proc_t cfs_read_proc_t; -typedef write_proc_t cfs_write_proc_t; -typedef struct proc_dir_entry cfs_proc_dir_entry_t; -#define cfs_create_proc_entry(n, m, p) create_proc_entry(n, m, p) -#define cfs_free_proc_entry(e) free_proc_entry(e) -#define cfs_remove_proc_entry(n, e) remove_proc_entry(n, e) - -/* - * Wait Queue - */ -#define CFS_TASK_INTERRUPTIBLE TASK_INTERRUPTIBLE -#define CFS_TASK_UNINT TASK_UNINTERRUPTIBLE - -typedef wait_queue_t cfs_waitlink_t; -typedef wait_queue_head_t cfs_waitq_t; - -typedef long cfs_task_state_t; - -#define cfs_waitq_init(w) init_waitqueue_head(w) -#define cfs_waitlink_init(l) init_waitqueue_entry(l, current) -#define cfs_waitq_add(w, l) add_wait_queue(w, l) -#define cfs_waitq_add_exclusive(w, l) add_wait_queue_exclusive(w, l) -#define cfs_waitq_forward(l, w) do {} while(0) -#define cfs_waitq_del(w, l) remove_wait_queue(w, l) -#define cfs_waitq_active(w) waitqueue_active(w) -#define cfs_waitq_signal(w) wake_up(w) -#define cfs_waitq_signal_nr(w,n) wake_up_nr(w, n) -#define cfs_waitq_broadcast(w) wake_up_all(w) -#define cfs_waitq_wait(l, s) schedule() -#define cfs_waitq_timedwait(l, s, t) schedule_timeout(t) -#define cfs_schedule_timeout(s, t) schedule_timeout(t) -#define cfs_schedule() schedule() - -/* Kernel thread */ -typedef int (*cfs_thread_t)(void *); - -static inline int cfs_kernel_thread(int (*fn)(void *), - void *arg, unsigned long flags) -{ - void *orig_info = current->journal_info; - int rc; - - current->journal_info = NULL; - rc = kernel_thread(fn, arg, flags); - current->journal_info = orig_info; - return rc; -} - - -/* - * Task struct - */ -typedef struct task_struct cfs_task_t; -#define cfs_current() current -#define cfs_task_lock(t) task_lock(t) -#define cfs_task_unlock(t) task_unlock(t) -#define CFS_DECL_JOURNAL_DATA void *journal_info -#define CFS_PUSH_JOURNAL do { \ - journal_info = current->journal_info; \ - current->journal_info = NULL; \ - } while(0) -#define CFS_POP_JOURNAL do { \ - current->journal_info = journal_info; \ - } while(0) - -/* Module interfaces */ -#define cfs_module(name, version, init, fini) \ -module_init(init); \ -module_exit(fini) - -/* - * Signal - */ -typedef sigset_t cfs_sigset_t; - -/* - * Timer - */ -typedef struct timer_list cfs_timer_t; -typedef void (*timer_func_t)(unsigned long); - -#define cfs_init_timer(t) init_timer(t) - -static inline void cfs_timer_init(cfs_timer_t *t, void (*func)(unsigned long), void *arg) -{ - init_timer(t); - t->function = (timer_func_t)func; - t->data = (unsigned long)arg; -} - -static inline void cfs_timer_done(cfs_timer_t *t) -{ - return; -} - -static inline void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline) -{ - mod_timer(t, deadline); -} - -static inline void cfs_timer_disarm(cfs_timer_t *t) -{ - del_timer(t); -} - -static inline int cfs_timer_is_armed(cfs_timer_t *t) -{ - return timer_pending(t); -} - -static inline cfs_time_t cfs_timer_deadline(cfs_timer_t *t) -{ - return t->expires; -} - - -/* deschedule for a bit... */ -static inline void cfs_pause(cfs_duration_t ticks) -{ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(ticks); -} - -#ifndef wait_event_timeout /* Only for RHEL3 2.4.21 kernel */ -#define __wait_event_timeout(wq, condition, timeout, ret) \ -do { \ - int __ret = 0; \ - if (!(condition)) { \ - wait_queue_t __wait; \ - unsigned long expire; \ - \ - init_waitqueue_entry(&__wait, current); \ - expire = timeout + jiffies; \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - if (jiffies > expire) { \ - ret = jiffies - expire; \ - break; \ - } \ - schedule_timeout(timeout); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ - } \ -} while (0) -/* - retval == 0; condition met; we're good. - retval > 0; timed out. -*/ -#define cfs_waitq_wait_event_timeout(wq, condition, timeout) \ -({ \ - int __ret = 0; \ - if (!(condition)) \ - __wait_event_timeout(wq, condition, timeout, __ret); \ - __ret; \ -}) -#else -#define cfs_waitq_wait_event_timeout wait_event_timeout -#endif - -#ifndef wait_event_interruptible_timeout /* Only for RHEL3 2.4.21 kernel */ -#define __wait_event_interruptible_timeout(wq, condition, timeout, ret) \ -do { \ - int __ret = 0; \ - if (!(condition)) { \ - wait_queue_t __wait; \ - unsigned long expire; \ - \ - init_waitqueue_entry(&__wait, current); \ - expire = timeout + jiffies; \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (jiffies > expire) { \ - ret = jiffies - expire; \ - break; \ - } \ - if (!signal_pending(current)) { \ - schedule_timeout(timeout); \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - break; \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ - } \ -} while (0) - -/* - retval == 0; condition met; we're good. - retval < 0; interrupted by signal. - retval > 0; timed out. -*/ -#define cfs_waitq_wait_event_interruptible_timeout(wq, condition, timeout) \ -({ \ - int __ret = 0; \ - if (!(condition)) \ - __wait_event_interruptible_timeout(wq, condition, \ - timeout, __ret); \ - __ret; \ -}) -#else -#define cfs_waitq_wait_event_interruptible_timeout wait_event_interruptible_timeout -#endif - -#else /* !__KERNEL__ */ - -typedef struct proc_dir_entry cfs_proc_dir_entry_t; -#include "../user-prim.h" - -#endif /* __KERNEL__ */ - -#endif diff --git a/lnet/include/libcfs/linux/linux-tcpip.h b/lnet/include/libcfs/linux/linux-tcpip.h deleted file mode 100644 index fb2ac932eb35c5fa7198676f2c19b5924b53542c..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-tcpip.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_TCP_H__ -#define __LIBCFS_LINUX_CFS_TCP_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <net/sock.h> - -typedef struct socket cfs_socket_t; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72)) -# define sk_allocation allocation -# define sk_data_ready data_ready -# define sk_write_space write_space -# define sk_user_data user_data -# define sk_prot prot -# define sk_sndbuf sndbuf -# define sk_rcvbuf rcvbuf -# define sk_socket socket -# define sk_sleep sleep -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) -# define sk_wmem_queued wmem_queued -# define sk_err err -# define sk_route_caps route_caps -#endif - -#define SOCK_SNDBUF(so) ((so)->sk->sk_sndbuf) -#define SOCK_WMEM_QUEUED(so) ((so)->sk->sk_wmem_queued) -#define SOCK_ERROR(so) ((so)->sk->sk_err) -#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags) - -#else /* !__KERNEL__ */ - -#include "../user-tcpip.h" - -#endif /* __KERNEL__ */ - -#endif diff --git a/lnet/include/libcfs/linux/linux-time.h b/lnet/include/libcfs/linux/linux-time.h deleted file mode 100644 index 3d4cdf54201bb0e7263938ad2bd03bb75eb0792c..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-time.h +++ /dev/null @@ -1,327 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for Linux (kernel and user-level). - * - */ - -#ifndef __LIBCFS_LINUX_LINUX_TIME_H__ -#define __LIBCFS_LINUX_LINUX_TIME_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Portable time API */ - -/* - * Platform provides three opaque data-types: - * - * cfs_time_t represents point in time. This is internal kernel - * time rather than "wall clock". This time bears no - * relation to gettimeofday(). - * - * cfs_duration_t represents time interval with resolution of internal - * platform clock - * - * cfs_fs_time_t represents instance in world-visible time. This is - * used in file-system time-stamps - * - * cfs_time_t cfs_time_current(void); - * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); - * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); - * int cfs_time_before (cfs_time_t, cfs_time_t); - * int cfs_time_beforeq(cfs_time_t, cfs_time_t); - * - * cfs_duration_t cfs_duration_build(int64_t); - * - * time_t cfs_duration_sec (cfs_duration_t); - * void cfs_duration_usec(cfs_duration_t, struct timeval *); - * void cfs_duration_nsec(cfs_duration_t, struct timespec *); - * - * void cfs_fs_time_current(cfs_fs_time_t *); - * time_t cfs_fs_time_sec (cfs_fs_time_t *); - * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); - * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); - * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); - * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); - * - * CFS_TIME_FORMAT - * CFS_DURATION_FORMAT - * - */ - -#define ONE_BILLION ((u_int64_t)1000000000) -#define ONE_MILLION 1000000 - -#ifdef __KERNEL__ -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/version.h> -#include <linux/time.h> -#include <asm/div64.h> - -#include <libcfs/linux/portals_compat25.h> - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -/* - * old kernels---CURRENT_TIME is struct timeval - */ -typedef struct timeval cfs_fs_time_t; - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - *v = *t; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - s->tv_sec = t->tv_sec; - s->tv_nsec = t->tv_usec * 1000; -} - -/* - * internal helper function used by cfs_fs_time_before*() - */ -static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t) -{ - return (unsigned long long)t->tv_sec * ONE_MILLION + t->tv_usec; -} - -#define CURRENT_KERN_TIME xtime - -#else -/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */ - -/* - * post 2.5 kernels. - */ - -#include <linux/jiffies.h> - -typedef struct timespec cfs_fs_time_t; - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - v->tv_sec = t->tv_sec; - v->tv_usec = t->tv_nsec / 1000; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - *s = *t; -} - -/* - * internal helper function used by cfs_fs_time_before*() - */ -static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t) -{ - return (unsigned long long)t->tv_sec * ONE_BILLION + t->tv_nsec; -} - -#define CURRENT_KERN_TIME CURRENT_TIME - -/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */ -#endif - -/* - * Generic kernel stuff - */ - -typedef unsigned long cfs_time_t; /* jiffies */ -typedef long cfs_duration_t; - - -static inline cfs_time_t cfs_time_current(void) -{ - return jiffies; -} - -static inline time_t cfs_time_current_sec(void) -{ - return CURRENT_SECONDS; -} - -static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) -{ - return t + d; -} - -static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) -{ - return t1 - t2; -} - -static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) -{ - return time_before(t1, t2); -} - -static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) -{ - return time_before_eq(t1, t2); -} - -static inline void cfs_fs_time_current(cfs_fs_time_t *t) -{ - *t = CURRENT_KERN_TIME; -} - -static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) -{ - return t->tv_sec; -} - -static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return __cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2); -} - -static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return __cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2); -} - -#if 0 -static inline cfs_duration_t cfs_duration_build(int64_t nano) -{ -#if (BITS_PER_LONG == 32) - /* We cannot use do_div(t, ONE_BILLION), do_div can only process - * 64 bits n and 32 bits base */ - int64_t t = nano * HZ; - do_div(t, 1000); - do_div(t, 1000000); - return (cfs_duration_t)t; -#else - return (nano * HZ / ONE_BILLION); -#endif -} -#endif - -static inline cfs_duration_t cfs_time_seconds(int seconds) -{ - return ((cfs_duration_t)seconds) * HZ; -} - -static inline time_t cfs_duration_sec(cfs_duration_t d) -{ - return d / HZ; -} - -static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) -{ -#if (BITS_PER_LONG == 32) && (HZ > 4096) - __u64 t; - - s->tv_sec = d / HZ; - t = (d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION; - do_div(t, HZ); - s->tv_usec = t; -#else - s->tv_sec = d / HZ; - s->tv_usec = ((d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION) / HZ; -#endif -} - -static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) -{ -#if (BITS_PER_LONG == 32) - __u64 t; - - s->tv_sec = d / HZ; - t = (d - s->tv_sec * HZ) * ONE_BILLION; - do_div(t, HZ); - s->tv_nsec = t; -#else - s->tv_sec = d / HZ; - s->tv_nsec = ((d - s->tv_sec * HZ) * ONE_BILLION) / HZ; -#endif -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) - -#define cfs_time_current_64 get_jiffies_64 - -static inline __u64 cfs_time_add_64(__u64 t, __u64 d) -{ - return t + d; -} - -static inline __u64 cfs_time_shift_64(int seconds) -{ - return cfs_time_add_64(cfs_time_current_64(), - cfs_time_seconds(seconds)); -} - -static inline int cfs_time_before_64(__u64 t1, __u64 t2) -{ - return (__s64)t2 - (__s64)t1 > 0; -} - -static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2) -{ - return (__s64)t2 - (__s64)t1 >= 0; -} - -#else -#define cfs_time_current_64 cfs_time_current -#define cfs_time_add_64 cfs_time_add -#define cfs_time_shift_64 cfs_time_shift -#define cfs_time_before_64 cfs_time_before -#define cfs_time_beforeq_64 cfs_time_beforeq -#endif - -/* - * One jiffy - */ -#define CFS_TICK (1) - -#define CFS_TIME_T "%lu" -#define CFS_DURATION_T "%ld" - -#else /* !__KERNEL__ */ - -/* - * Liblustre. time(2) based implementation. - */ - -#define CFS_TIME_T "%lu" - -#include <libcfs/user-time.h> - -#endif /* __KERNEL__ */ - -/* __LIBCFS_LINUX_LINUX_TIME_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/linux/lltrace.h b/lnet/include/libcfs/linux/lltrace.h deleted file mode 100644 index 1ddd03d41a8ec6f6349ddcbd227b520e360dedc9..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/lltrace.h +++ /dev/null @@ -1,28 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LINUX_LLTRACE_H__ -#define __LIBCFS_LINUX_LLTRACE_H__ - -#ifndef __LIBCFS_LLTRACE_H__ -#error Do not #include this file directly. #include <libcfs/lltrace.h> instead -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/time.h> -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <lnet/lnetctl.h> -#include <linux/limits.h> -#include <asm/page.h> -#include <linux/version.h> - -#endif diff --git a/lnet/include/libcfs/linux/portals_compat25.h b/lnet/include/libcfs/linux/portals_compat25.h deleted file mode 100644 index 2d6b782e0ab543cff6944df5f6cb3e40b998c88c..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/portals_compat25.h +++ /dev/null @@ -1,125 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__ -#define __LIBCFS_LINUX_PORTALS_COMPAT_H__ - -// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved -#if defined(SPINLOCK_DEBUG) && SPINLOCK_DEBUG -# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) -# define SIGNAL_MASK_ASSERT() \ - LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC) -# else -# define SIGNAL_MASK_ASSERT() \ - LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC) -# endif -#else -# define SIGNAL_MASK_ASSERT() -#endif -// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sighand->siglock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sighand->siglock, flags) -# define USERMODEHELPER(path, argv, envp) \ - call_usermodehelper(path, argv, envp, 1) -# define RECALC_SIGPENDING recalc_sigpending() -# define CLEAR_SIGPENDING clear_tsk_thread_flag(current, \ - TIF_SIGPENDING) -# define CURRENT_SECONDS get_seconds() -# define smp_num_cpus num_online_cpus() - - -#elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */ - -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sighand->siglock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sighand->siglock, flags) -# define USERMODEHELPER(path, argv, envp) \ - call_usermodehelper(path, argv, envp) -# define RECALC_SIGPENDING recalc_sigpending() -# define CLEAR_SIGPENDING (current->sigpending = 0) -# define CURRENT_SECONDS CURRENT_TIME -# define wait_event_interruptible_exclusive(wq, condition) \ - wait_event_interruptible(wq, condition) - -#else /* 2.4.x */ - -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sigmask_lock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sigmask_lock, flags) -# define USERMODEHELPER(path, argv, envp) \ - call_usermodehelper(path, argv, envp) -# define RECALC_SIGPENDING recalc_sigpending(current) -# define CLEAR_SIGPENDING (current->sigpending = 0) -# define CURRENT_SECONDS CURRENT_TIME -# define wait_event_interruptible_exclusive(wq, condition) \ - wait_event_interruptible(wq, condition) - -#endif - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) -#define UML_PID(tsk) ((tsk)->thread.extern_pid) -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define UML_PID(tsk) ((tsk)->thread.mode.tt.extern_pid) -#else -#define UML_PID(tsk) ((tsk)->pid) -#endif - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# define THREAD_NAME(comm, len, fmt, a...) \ - snprintf(comm, len,fmt"|%d", ## a, UML_PID(current)) -#else -# define THREAD_NAME(comm, len, fmt, a...) \ - snprintf(comm, len, fmt, ## a) -#endif - -#ifdef HAVE_PAGE_LIST -/* 2.4 alloc_page users can use page->list */ -#define PAGE_LIST_ENTRY list -#define PAGE_LIST(page) ((page)->list) -#else -/* 2.6 alloc_page users can use page->lru */ -#define PAGE_LIST_ENTRY lru -#define PAGE_LIST(page) ((page)->lru) -#endif - -#ifndef HAVE_CPU_ONLINE -#define cpu_online(cpu) ((1<<cpu) & (cpu_online_map)) -#endif -#ifndef HAVE_CPUMASK_T -typedef unsigned long cpumask_t; -#define cpu_set(cpu, map) set_bit(cpu, &(map)) -#define cpus_clear(map) memset(&(map), 0, sizeof(cpumask_t)) -#endif - -#ifndef __user -#define __user -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) -#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \ - proc_dointvec(table, write, filp, buffer, lenp) -#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \ - proc_dostring(table, write, filp, buffer, lenp) -#define LL_PROC_PROTO(name) \ - name(cfs_sysctl_table_t *table, int write, struct file *filp, \ - void __user *buffer, size_t *lenp) -#define DECLARE_LL_PROC_PPOS_DECL loff_t *ppos = &filp->f_pos -#else -#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \ - proc_dointvec(table, write, filp, buffer, lenp, ppos); -#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \ - proc_dostring(table, write, filp, buffer, lenp, ppos); -#define LL_PROC_PROTO(name) \ - name(cfs_sysctl_table_t *table, int write, struct file *filp, \ - void __user *buffer, size_t *lenp, loff_t *ppos) -#define DECLARE_LL_PROC_PPOS_DECL -#endif - -#endif /* _PORTALS_COMPAT_H */ diff --git a/lnet/include/libcfs/linux/portals_utils.h b/lnet/include/libcfs/linux/portals_utils.h deleted file mode 100644 index 4e76856b5c003dce67309cd0181f30162321a8f9..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/portals_utils.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef __LIBCFS_LINUX_PORTALS_UTILS_H__ -#define __LIBCFS_LINUX_PORTALS_UTILS_H__ - -#ifndef __LIBCFS_PORTALS_UTILS_H__ -#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead -#endif - -#ifdef __KERNEL__ -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/wait.h> -#include <linux/smp_lock.h> -#include <linux/poll.h> -#include <linux/random.h> - -#include <asm/unistd.h> -#include <asm/semaphore.h> - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# include <linux/tqueue.h> -#else /* (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) */ -# include <linux/workqueue.h> -#endif -#include <libcfs/linux/linux-mem.h> -#include <libcfs/linux/linux-prim.h> -#else /* !__KERNEL__ */ - -#include <endian.h> -#include <libcfs/list.h> - -#ifdef HAVE_LINUX_VERSION_H -# include <linux/version.h> - -# if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# define BUG() /* workaround for module.h includes */ -# include <linux/module.h> -# endif -#endif /* !HAVE_LINUX_VERSION_H */ - -#ifndef __CYGWIN__ -# include <sys/syscall.h> -#else /* __CYGWIN__ */ -# include <windows.h> -# include <windef.h> -# include <netinet/in.h> -#endif /* __CYGWIN__ */ - -#endif /* !__KERNEL__ */ -#endif diff --git a/lnet/include/libcfs/list.h b/lnet/include/libcfs/list.h deleted file mode 100644 index 799d853667d53163267123871c779f940f1c6055..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/list.h +++ /dev/null @@ -1,446 +0,0 @@ -#ifndef __LIBCFS_LIST_H__ -#define __LIBCFS_LIST_H__ - -#if defined (__linux__) && defined(__KERNEL__) - -#include <linux/list.h> - -#define CFS_LIST_HEAD_INIT(n) LIST_HEAD_INIT(n) -#define CFS_LIST_HEAD(n) LIST_HEAD(n) -#define CFS_INIT_LIST_HEAD(p) INIT_LIST_HEAD(p) - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define CFS_HLIST_HEAD_INIT HLIST_HEAD_INIT -#define CFS_HLIST_HEAD(n) HLIST_HEAD(n) -#define CFS_INIT_HLIST_HEAD(p) INIT_HLIST_HEAD(p) -#define CFS_INIT_HLIST_NODE(p) INIT_HLIST_NODE(p) -#endif - -#else /* !defined (__linux__) || !defined(__KERNEL__) */ - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -#ifndef __WINNT__ -#define prefetch(a) ((void)a) -#else -#define prefetch(a) ((void *)a) -#endif - -struct list_head { - struct list_head *next, *prev; -}; - -typedef struct list_head list_t; - -#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) } - -#define CFS_LIST_HEAD(name) \ - struct list_head name = CFS_LIST_HEAD_INIT(name) - -#define CFS_INIT_LIST_HEAD(ptr) do { \ - (ptr)->next = (ptr); (ptr)->prev = (ptr); \ -} while (0) - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add(struct list_head * new, - struct list_head * prev, - struct list_head * next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is in an undefined state. - */ -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); -} - -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - CFS_INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - * - * This is not safe to use if @list is already on the same list as @head. - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - * - * This is not safe to use if @list is already on the same list as @head. - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(struct list_head *head) -{ - return head->next == head; -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - CFS_INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next, prefetch(pos->next); pos != (head); \ - pos = pos->next, prefetch(pos->next)) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop counter. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -/* - * Double linked lists with a single pointer list head. - * Mostly useful for hash tables where the two pointer list head is - * too wasteful. - * You lose the ability to access the tail in O(1). - */ - -struct hlist_head { - struct hlist_node *first; -}; - -struct hlist_node { - struct hlist_node *next, **pprev; -}; - -/* - * "NULL" might not be defined at this point - */ -#ifdef NULL -#define NULL_P NULL -#else -#define NULL_P ((void *)0) -#endif - -#define CFS_HLIST_HEAD_INIT { .first = NULL_P } -#define CFS_HLIST_HEAD(name) struct hlist_head name = { .first = NULL_P } -#define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P) -#define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P) - -#define HLIST_HEAD_INIT CFS_HLIST_HEAD_INIT -#define HLIST_HEAD(n) CFS_HLIST_HEAD(n) -#define INIT_HLIST_HEAD(p) CFS_INIT_HLIST_HEAD(p) -#define INIT_HLIST_NODE(p) CFS_INIT_HLIST_NODE(p) - -static inline int hlist_unhashed(const struct hlist_node *h) -{ - return !h->pprev; -} - -static inline int hlist_empty(const struct hlist_head *h) -{ - return !h->first; -} - -static inline void __hlist_del(struct hlist_node *n) -{ - struct hlist_node *next = n->next; - struct hlist_node **pprev = n->pprev; - *pprev = next; - if (next) - next->pprev = pprev; -} - -static inline void hlist_del(struct hlist_node *n) -{ - __hlist_del(n); -} - -static inline void hlist_del_init(struct hlist_node *n) -{ - if (n->pprev) { - __hlist_del(n); - INIT_HLIST_NODE(n); - } -} - -static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - if (first) - first->pprev = &n->next; - h->first = n; - n->pprev = &h->first; -} - -/* next must be != NULL */ -static inline void hlist_add_before(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - next->pprev = &n->next; - *(n->pprev) = n; -} - -static inline void hlist_add_after(struct hlist_node *n, - struct hlist_node *next) -{ - next->next = n->next; - n->next = next; - next->pprev = &n->next; - - if(next->next) - next->next->pprev = &next->next; -} - -#define hlist_entry(ptr, type, member) container_of(ptr,type,member) - -#define hlist_for_each(pos, head) \ - for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ - pos = pos->next) - -#define hlist_for_each_safe(pos, n, head) \ - for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ - pos = n) - -/** - * hlist_for_each_entry - iterate over list of given type - * @tpos: the type * to use as a loop counter. - * @pos: the &struct hlist_node to use as a loop counter. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point - * @tpos: the type * to use as a loop counter. - * @pos: the &struct hlist_node to use as a loop counter. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_from - iterate over a hlist continuing from existing point - * @tpos: the type * to use as a loop counter. - * @pos: the &struct hlist_node to use as a loop counter. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @tpos: the type * to use as a loop counter. - * @pos: the &struct hlist_node to use as a loop counter. - * @n: another &struct hlist_node to use as temporary storage - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) - -#endif /* __linux__ && __KERNEL__ */ - -#ifndef list_for_each_prev -/** - * list_for_each_prev - iterate over a list in reverse order - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ - pos = pos->prev, prefetch(pos->prev)) - -#endif /* list_for_each_prev */ - -#ifndef list_for_each_entry -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - prefetch(pos->member.next); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member), \ - prefetch(pos->member.next)) -#endif /* list_for_each_entry */ - -#ifndef list_for_each_entry_reverse -/** - * list_for_each_entry_reverse - iterate backwards over list of given type. - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_reverse(pos, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member); \ - prefetch(pos->member.prev), &pos->member != (head); \ - pos = list_entry(pos->member.prev, typeof(*pos), member)) -#endif /* list_for_each_entry_reverse */ - -#ifndef list_for_each_entry_safe -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop counter. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) -#endif /* list_for_each_entry_safe */ - -#endif /* __LIBCFS_LUSTRE_LIST_H__ */ diff --git a/lnet/include/libcfs/lltrace.h b/lnet/include/libcfs/lltrace.h deleted file mode 100644 index dbeae911d2e24b26326a422797eaa3f7d48882ea..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/lltrace.h +++ /dev/null @@ -1,167 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Compile with: - * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl - */ -#ifndef __LIBCFS_LLTRACE_H__ -#define __LIBCFS_LLTRACE_H__ - -#if defined(__linux__) -#include <libcfs/linux/lltrace.h> -#elif defined(__APPLE__) -#include <libcfs/darwin/lltrace.h> -#elif defined(__WINNT__) -#include <libcfs/winnt/lltrace.h> -#else -#error Unsupported Operating System -#endif - -static inline int ltrace_write_file(char* fname) -{ - char* argv[3]; - - argv[0] = "debug_kernel"; - argv[1] = fname; - argv[2] = "1"; - - fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]); - - return jt_dbg_debug_kernel(3, argv); -} - -static inline int ltrace_clear() -{ - char* argv[1]; - - argv[0] = "clear"; - - fprintf(stderr, "[ptlctl] %s\n", argv[0]); - - return jt_dbg_clear_debug_buf(1, argv); -} - -static inline int ltrace_mark(int indent_level, char* text) -{ - char* argv[2]; - char mark_buf[PATH_MAX]; - - snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text); - - argv[0] = "mark"; - argv[1] = mark_buf; - return jt_dbg_mark_debug_buf(2, argv); -} - -static inline int ltrace_applymasks() -{ - char* argv[2]; - argv[0] = "list"; - argv[1] = "applymasks"; - - fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]); - - return jt_dbg_list(2, argv); -} - - -static inline int ltrace_filter(char* subsys_or_mask) -{ - char* argv[2]; - argv[0] = "filter"; - argv[1] = subsys_or_mask; - return jt_dbg_filter(2, argv); -} - -static inline int ltrace_show(char* subsys_or_mask) -{ - char* argv[2]; - argv[0] = "show"; - argv[1] = subsys_or_mask; - return jt_dbg_show(2, argv); -} - -static inline int ltrace_start() -{ - int rc = 0; - dbg_initialize(0, NULL); -#ifdef LNET_DEV_ID - rc = register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH, - LNET_DEV_MAJOR, LNET_DEV_MINOR); -#endif - ltrace_filter("class"); - ltrace_filter("nal"); - ltrace_filter("portals"); - - ltrace_show("all_types"); - ltrace_filter("trace"); - ltrace_filter("malloc"); - ltrace_filter("net"); - ltrace_filter("page"); - ltrace_filter("other"); - ltrace_filter("info"); - ltrace_applymasks(); - - return rc; -} - - -static inline void ltrace_stop() -{ -#ifdef LNET_DEV_ID - unregister_ioc_dev(LNET_DEV_ID); -#endif -} - -static inline int not_uml() -{ - /* Return Values: - * 0 when run under UML - * 1 when run on host - * <0 when lookup failed - */ - struct stat buf; - int rc = stat("/dev/ubd", &buf); - rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc; - if (rc<0) { - fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno)); - rc = 1; /* Assume host */ - } - return rc; -} - -#define LTRACE_MAX_NOB 256 -static inline void ltrace_add_processnames(char* fname) -{ - char cmdbuf[LTRACE_MAX_NOB]; - struct timeval tv; - struct timezone tz; - int nob; - int underuml = !not_uml(); - - gettimeofday(&tv, &tz); - - nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \""); - - /* Careful - these format strings need to match the CDEBUG - * formats in portals/linux/debug.c EXACTLY - */ - nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ", - S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec); - - if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) { - nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, - "(%s:%d:%s() %d | %d+%lu): ", - "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L); - } - else { - nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, - "(%s:%d:%s() %d+%lu): ", - "lltrace.h", __LINE__, __FUNCTION__, 0, 0L); - } - - nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname); - system(cmdbuf); -} - -#endif diff --git a/lnet/include/libcfs/portals_utils.h b/lnet/include/libcfs/portals_utils.h deleted file mode 100644 index b79eb7eb00efa0d891b951f270bec92ab70333e4..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/portals_utils.h +++ /dev/null @@ -1,21 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_PORTALS_UTILS_H__ -#define __LIBCFS_PORTALS_UTILS_H__ - -/* - * portals_utils.h - * - */ -#if defined(__linux__) -#include <libcfs/linux/portals_utils.h> -#elif defined(__APPLE__) -#include <libcfs/darwin/portals_utils.h> -#elif defined(__WINNT__) -#include <libcfs/winnt/portals_utils.h> -#else -#error Unsupported Operating System -#endif - -#endif diff --git a/lnet/include/libcfs/types.h b/lnet/include/libcfs/types.h deleted file mode 100755 index 71dd7fb1e1ca3e135e9762c3160812687f8b062a..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/types.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _LIBCFS_TYPES_H -#define _LIBCFS_TYPES_H - -/* - * This file was inttroduced to resolve XT3 (Catamount) build issues. - * The orignal idea was to move <lustre/types.h> here however at - * the time of this writing - * it's unclear what external dependencies are tied - * to that file (It's not just some source file #including it) - * there is some build/packaging infrastructure that includes it. - * Hopefully that will be resolved shortly, that file will - * be removed, its contents copied here and this comment can be deleted. - */ - -#include <lustre/types.h> - -#endif diff --git a/lnet/include/libcfs/user-bitops.h b/lnet/include/libcfs/user-bitops.h deleted file mode 100644 index d2eea0edaf35b6b478cef7d6183a50ad4cb4155b..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/user-bitops.h +++ /dev/null @@ -1,102 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for user-level. - * - */ - -#ifndef __LIBCFS_USER_BITOPS_H__ -#define __LIBCFS_USER_BITOPS_H__ - -/* test if bit nr is set in bitmap addr; returns previous value of bit nr */ -static __inline__ int set_bit(int nr, unsigned long * addr) -{ - long mask; - - addr += nr / BITS_PER_LONG; - mask = 1UL << (nr & (BITS_PER_LONG - 1)); - nr = (mask & *addr) != 0; - *addr |= mask; - return nr; -} - -/* clear bit nr in bitmap addr; returns previous value of bit nr*/ -static __inline__ int clear_bit(int nr, unsigned long * addr) -{ - long mask; - - addr += nr / BITS_PER_LONG; - mask = 1UL << (nr & (BITS_PER_LONG - 1)); - nr = (mask & *addr) != 0; - *addr &= ~mask; - return nr; -} - -static __inline__ int test_bit(int nr, const unsigned long * addr) -{ - return ((1UL << (nr & (BITS_PER_LONG - 1))) & ((addr)[nr / BITS_PER_LONG])) != 0; -} - -/* using binary seach */ -static __inline__ unsigned long __ffs(long data) -{ - int pos = 0; - -#if BITS_PER_LONG == 64 - if ((data & 0xFFFFFFFF) == 0) { - pos += 32; - data >>= 32; - } -#endif - if ((data & 0xFFFF) == 0) { - pos += 16; - data >>= 16; - } - if ((data & 0xFF) == 0) { - pos += 8; - data >>= 8; - } - if ((data & 0xF) == 0) { - pos += 4; - data >>= 4; - } - if ((data & 0x3) == 0) { - pos += 2; - data >>= 2; - } - if ((data & 0x1) == 0) - pos += 1; - - return pos; -} - -#define __ffz(x) __ffs(~(x)) - -unsigned long find_next_bit(unsigned long *addr, - unsigned long size, unsigned long offset); - -unsigned long find_next_zero_bit(unsigned long *addr, - unsigned long size, unsigned long offset); - -#define find_first_bit(addr,size) (find_next_bit((addr),(size),0)) -#define find_first_zero_bit(addr,size) (find_next_zero_bit((addr),(size),0)) - -#endif diff --git a/lnet/include/libcfs/user-lock.h b/lnet/include/libcfs/user-lock.h deleted file mode 100644 index 6b46ce219488001aad369f8bf7ea07f44a8f0fee..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/user-lock.h +++ /dev/null @@ -1,243 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for user-level. - * - */ - -#ifndef __LIBCFS_USER_LOCK_H__ -#define __LIBCFS_USER_LOCK_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Implementations of portable synchronization APIs for liblustre */ - -/* - * liblustre is single-threaded, so most "synchronization" APIs are trivial. - * - * XXX Liang: There are several branches share lnet with b_hd_newconfig, - * if we define lock APIs at here, there will be conflict with liblustre - * in other branches. - */ - -#ifndef __KERNEL__ -#include <stdio.h> -#include <stdlib.h> - -#if 0 -/* - * Optional debugging (magic stamping and checking ownership) can be added. - */ - -/* - * spin_lock - * - * - spin_lock_init(x) - * - spin_lock(x) - * - spin_unlock(x) - * - spin_trylock(x) - * - * - spin_lock_irqsave(x, f) - * - spin_unlock_irqrestore(x, f) - * - * No-op implementation. - */ -struct spin_lock {int foo;}; - -typedef struct spin_lock spinlock_t; - -#define SPIN_LOCK_UNLOCKED (spinlock_t) { } -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) - -void spin_lock_init(spinlock_t *lock); -void spin_lock(spinlock_t *lock); -void spin_unlock(spinlock_t *lock); -int spin_trylock(spinlock_t *lock); -void spin_lock_bh_init(spinlock_t *lock); -void spin_lock_bh(spinlock_t *lock); -void spin_unlock_bh(spinlock_t *lock); -static inline int spin_is_locked(spinlock_t *l) {return 1;} - -static inline void spin_lock_irqsave(spinlock_t *l, unsigned long f){} -static inline void spin_unlock_irqrestore(spinlock_t *l, unsigned long f){} - -/* - * Semaphore - * - * - sema_init(x, v) - * - __down(x) - * - __up(x) - */ -typedef struct semaphore { - int foo; -} mutex_t; - -void sema_init(struct semaphore *s, int val); -void __down(struct semaphore *s); -void __up(struct semaphore *s); - -/* - * Mutex: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ -#define mutex_up(s) __up(s) -#define mutex_down(s) __down(s) - -#define init_mutex(x) sema_init(x, 1) -#define init_mutex_locked(x) sema_init(x, 0) - -/* - * Completion: - * - * - init_completion(c) - * - complete(c) - * - wait_for_completion(c) - */ -#if 0 -struct completion {}; - -void init_completion(struct completion *c); -void complete(struct completion *c); -void wait_for_completion(struct completion *c); -#endif - -/* - * rw_semaphore: - * - * - init_rwsem(x) - * - down_read(x) - * - up_read(x) - * - down_write(x) - * - up_write(x) - */ -struct rw_semaphore {}; - -void init_rwsem(struct rw_semaphore *s); -void down_read(struct rw_semaphore *s); -int down_read_trylock(struct rw_semaphore *s); -void down_write(struct rw_semaphore *s); -int down_write_trylock(struct rw_semaphore *s); -void up_read(struct rw_semaphore *s); -void up_write(struct rw_semaphore *s); - -/* - * read-write lock : Need to be investigated more!! - * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore - * - * - DECLARE_RWLOCK(l) - * - rwlock_init(x) - * - read_lock(x) - * - read_unlock(x) - * - write_lock(x) - * - write_unlock(x) - */ -typedef struct rw_semaphore rwlock_t; - -#define rwlock_init(pl) init_rwsem(pl) - -#define read_lock(l) down_read(l) -#define read_unlock(l) up_read(l) -#define write_lock(l) down_write(l) -#define write_unlock(l) up_write(l) - -static inline void -write_lock_irqsave(rwlock_t *l, unsigned long f) { write_lock(l); } -static inline void -write_unlock_irqrestore(rwlock_t *l, unsigned long f) { write_unlock(l); } - -static inline void -read_lock_irqsave(rwlock_t *l, unsigned long f) { read_lock(l); } -static inline void -read_unlock_irqrestore(rwlock_t *l, unsigned long f) { read_unlock(l); } - -/* - * Atomic for user-space - * Copied from liblustre - */ -typedef struct { volatile int counter; } atomic_t; - -#define ATOMIC_INIT(i) { (i) } -#define atomic_read(a) ((a)->counter) -#define atomic_set(a,b) do {(a)->counter = b; } while (0) -#define atomic_dec_and_test(a) ((--((a)->counter)) == 0) -#define atomic_inc(a) (((a)->counter)++) -#define atomic_dec(a) do { (a)->counter--; } while (0) -#define atomic_add(b,a) do {(a)->counter += b;} while (0) -#define atomic_add_return(n,a) ((a)->counter = n) -#define atomic_inc_return(a) atomic_add_return(1,a) -#define atomic_sub(b,a) do {(a)->counter -= b;} while (0) - -#endif - -#ifdef HAVE_LIBPTHREAD -#include <pthread.h> - -/* - * Completion - */ - -struct cfs_completion { - int c_done; - pthread_cond_t c_cond; - pthread_mutex_t c_mut; -}; - -void cfs_init_completion(struct cfs_completion *c); -void cfs_fini_completion(struct cfs_completion *c); -void cfs_complete(struct cfs_completion *c); -void cfs_wait_for_completion(struct cfs_completion *c); - -/* - * atomic.h - */ - -typedef struct { volatile int counter; } cfs_atomic_t; - -int cfs_atomic_read(cfs_atomic_t *a); -void cfs_atomic_set(cfs_atomic_t *a, int b); -int cfs_atomic_dec_and_test(cfs_atomic_t *a); -void cfs_atomic_inc(cfs_atomic_t *a); -void cfs_atomic_dec(cfs_atomic_t *a); -void cfs_atomic_add(int b, cfs_atomic_t *a); -void cfs_atomic_sub(int b, cfs_atomic_t *a); - -#endif /* HAVE_LIBPTHREAD */ - -/* !__KERNEL__ */ -#endif - -/* __LIBCFS_USER_LOCK_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/user-prim.h b/lnet/include/libcfs/user-prim.h deleted file mode 100644 index 43c1aeb4eb36895a2c2c2e05e9155648e9290b5d..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/user-prim.h +++ /dev/null @@ -1,328 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for user-level. - * - */ - -#ifndef __LIBCFS_USER_PRIM_H__ -#define __LIBCFS_USER_PRIM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Implementations of portable APIs for liblustre */ - -/* - * liblustre is single-threaded, so most "synchronization" APIs are trivial. - */ - -#ifndef __KERNEL__ - -#include <stdlib.h> -#include <string.h> -#include <sys/signal.h> -#include <sys/mman.h> -#include <libcfs/list.h> -#include <libcfs/user-time.h> -#include <signal.h> -#include <stdlib.h> -#include <unistd.h> - -#ifdef HAVE_LIBPTHREAD -#include <pthread.h> -#endif - - -/* - * Wait Queue. No-op implementation. - */ - -typedef struct cfs_waitlink { - struct list_head sleeping; - void *process; -} cfs_waitlink_t; - -typedef struct cfs_waitq { - struct list_head sleepers; -} cfs_waitq_t; - -void cfs_waitq_init(struct cfs_waitq *waitq); -void cfs_waitlink_init(struct cfs_waitlink *link); -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, - struct cfs_waitlink *link); -void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); -void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); -int cfs_waitq_active(struct cfs_waitq *waitq); -void cfs_waitq_signal(struct cfs_waitq *waitq); -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); -void cfs_waitq_broadcast(struct cfs_waitq *waitq); -void cfs_waitq_wait(struct cfs_waitlink *link, int state); -int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout); -#define cfs_schedule_timeout(s, t) \ - do { \ - cfs_waitlink_t l; \ - cfs_waitq_timedwait(&l, s, t); \ - } while (0) - -#define CFS_TASK_INTERRUPTIBLE (0) -#define CFS_TASK_UNINT (0) - -/* 2.4 defines */ - -/* XXX - * for this moment, liblusre will not rely OST for non-page-aligned write - */ -#define LIBLUSTRE_HANDLE_UNALIGNED_PAGE - -struct page { - void *addr; - unsigned long index; - struct list_head list; - unsigned long private; - - /* internally used by liblustre file i/o */ - int _offset; - int _count; -#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE - int _managed; -#endif - struct list_head _node; -}; - -typedef struct page cfs_page_t; - -#ifndef PAGE_SIZE - -/* 4K */ -#define CFS_PAGE_SHIFT 12 -#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT) -#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) - -#else - -#define CFS_PAGE_SIZE PAGE_SIZE -#define CFS_PAGE_SHIFT PAGE_SHIFT -#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) - -#endif - -cfs_page_t *cfs_alloc_page(unsigned int flags); -void cfs_free_page(cfs_page_t *pg); -void *cfs_page_address(cfs_page_t *pg); -void *cfs_kmap(cfs_page_t *pg); -void cfs_kunmap(cfs_page_t *pg); - -#define cfs_get_page(p) __I_should_not_be_called__(at_all) -#define cfs_page_count(p) __I_should_not_be_called__(at_all) -#define cfs_page_index(p) ((p)->index) - -/* - * Memory allocator - * Inline function, so utils can use them without linking of libcfs - */ -#define __ALLOC_ZERO (1 << 2) -static inline void *cfs_alloc(size_t nr_bytes, u_int32_t flags) -{ - void *result; - - result = malloc(nr_bytes); - if (result != NULL && (flags & __ALLOC_ZERO)) - memset(result, 0, nr_bytes); - return result; -} - -#define cfs_free(addr) free(addr) -#define cfs_alloc_large(nr_bytes) cfs_alloc(nr_bytes, 0) -#define cfs_free_large(addr) cfs_free(addr) - -#define CFS_ALLOC_ATOMIC_TRY (0) -/* - * SLAB allocator - */ -typedef struct { - int size; -} cfs_mem_cache_t; - -#define SLAB_HWCACHE_ALIGN 0 -#define SLAB_KERNEL 0 -#define SLAB_NOFS 0 - -cfs_mem_cache_t * -cfs_mem_cache_create(const char *, size_t, size_t, unsigned long); -int cfs_mem_cache_destroy(cfs_mem_cache_t *c); -void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp); -void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr); - -typedef int (cfs_read_proc_t)(char *page, char **start, off_t off, - int count, int *eof, void *data); - -struct file; /* forward ref */ -typedef int (cfs_write_proc_t)(struct file *file, const char *buffer, - unsigned long count, void *data); - -/* - * Signal - */ -typedef sigset_t cfs_sigset_t; - -/* - * Timer - */ -#include <sys/time.h> - -typedef struct { - struct list_head tl_list; - void (*function)(unsigned long unused); - unsigned long data; - long expires; -} cfs_timer_t; - -#define cfs_init_timer(t) do {} while(0) -#define cfs_jiffies \ -({ \ - unsigned long _ret = 0; \ - struct timeval tv; \ - if (gettimeofday(&tv, NULL) == 0) \ - _ret = tv.tv_sec; \ - _ret; \ -}) - -static inline int cfs_timer_init(cfs_timer_t *l, void (* func)(unsigned long), void *arg) -{ - CFS_INIT_LIST_HEAD(&l->tl_list); - l->function = func; - l->data = (unsigned long)arg; - return 0; -} - -static inline int cfs_timer_is_armed(cfs_timer_t *l) -{ - if (cfs_time_before(cfs_jiffies, l->expires)) - return 1; - else - return 0; -} - -static inline void cfs_timer_arm(cfs_timer_t *l, int thetime) -{ - l->expires = thetime; -} - -static inline void cfs_timer_disarm(cfs_timer_t *l) -{ -} - -static inline long cfs_timer_deadline(cfs_timer_t *l) -{ - return l->expires; -} - -#if 0 -#define cfs_init_timer(t) do {} while(0) -void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg); -void cfs_timer_done(struct cfs_timer *t); -void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline); -void cfs_timer_disarm(struct cfs_timer *t); -int cfs_timer_is_armed(struct cfs_timer *t); - -cfs_time_t cfs_timer_deadline(struct cfs_timer *t); -#endif - -#define in_interrupt() (0) - -static inline void cfs_pause(cfs_duration_t d) -{ - struct timespec s; - - cfs_duration_nsec(d, &s); - nanosleep(&s, NULL); -} - -typedef void cfs_psdev_t; - -static inline int cfs_psdev_register(cfs_psdev_t *foo) -{ - return 0; -} - -static inline int cfs_psdev_deregister(cfs_psdev_t *foo) -{ - return 0; -} - -#define cfs_lock_kernel() do {} while (0) -#define cfs_sigfillset(l) do {} while (0) -#define cfs_recalc_sigpending(l) do {} while (0) -#define cfs_kernel_thread(l,m,n) LBUG() - -#ifdef HAVE_LIBPTHREAD -typedef int (*cfs_thread_t)(void *); -int cfs_create_thread(cfs_thread_t func, void *arg); -#else -#define cfs_create_thread(l,m) LBUG() -#endif - -int cfs_parse_int_tunable(int *value, char *name); -uid_t cfs_curproc_uid(void); - -#define LIBCFS_REALLOC(ptr, size) realloc(ptr, size) - -#define cfs_online_cpus() sysconf(_SC_NPROCESSORS_ONLN) - -// static inline void local_irq_save(unsigned long flag) {return;} -// static inline void local_irq_restore(unsigned long flag) {return;} - -enum { - CFS_STACK_TRACE_DEPTH = 16 -}; - -struct cfs_stack_trace { - void *frame[CFS_STACK_TRACE_DEPTH]; -}; - -/* - * arithmetic - */ -#define do_div(a,b) \ - ({ \ - unsigned long remainder;\ - remainder = (a) % (b); \ - (a) = (a) / (b); \ - (remainder); \ - }) - -/* !__KERNEL__ */ -#endif - -/* __LIBCFS_USER_PRIM_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/user-tcpip.h b/lnet/include/libcfs/user-tcpip.h deleted file mode 100644 index 342c03997155076867b88f1555fad47a72f65b4a..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/user-tcpip.h +++ /dev/null @@ -1,90 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __LIBCFS_USER_TCPIP_H__ -#define __LIBCFS_USER_TCPIP_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifndef __KERNEL__ - -#include <sys/uio.h> - -/* - * Functions to get network interfaces info - */ - -int libcfs_sock_ioctl(int cmd, unsigned long arg); -int libcfs_ipif_query (char *name, int *up, __u32 *ip); -void libcfs_ipif_free_enumeration (char **names, int n); -int libcfs_ipif_enumerate (char ***namesp); - -/* - * Network function used by user-land lnet acceptor - */ - -int libcfs_sock_listen (int *sockp, __u32 local_ip, int local_port, int backlog); -int libcfs_sock_accept (int *newsockp, int sock, __u32 *peer_ip, int *peer_port); -int libcfs_sock_read (int sock, void *buffer, int nob, int timeout); -void libcfs_sock_abort_accept(__u16 port); - -/* - * Network functions of common use - */ - -int libcfs_getpeername(int sock_fd, __u32 *ipaddr_p, __u16 *port_p); -int libcfs_socketpair(int *fdp); -int libcfs_fcntl_nonblock(int fd); -int libcfs_sock_set_nagle(int fd, int nagle); -int libcfs_sock_set_bufsiz(int fd, int bufsiz); -int libcfs_sock_create(int *fdp); -int libcfs_sock_bind_to_port(int fd, __u16 port); -int libcfs_sock_connect(int fd, __u32 ip, __u16 port); -int libcfs_sock_writev(int fd, const struct iovec *vector, int count); -int libcfs_sock_readv(int fd, const struct iovec *vector, int count); - -/* - * Macros for easy printing IP-adresses - */ - -#define NIPQUAD(addr) \ - ((unsigned char *)&addr)[0], \ - ((unsigned char *)&addr)[1], \ - ((unsigned char *)&addr)[2], \ - ((unsigned char *)&addr)[3] - -#if defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN) -#define HIPQUAD(addr) \ - ((unsigned char *)&addr)[3], \ - ((unsigned char *)&addr)[2], \ - ((unsigned char *)&addr)[1], \ - ((unsigned char *)&addr)[0] -#elif defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) -#define HIPQUAD NIPQUAD -#else -#error "Undefined byteorder??" -#endif /* __LITTLE_ENDIAN */ - -#endif /* !__KERNEL__ */ - -#endif diff --git a/lnet/include/libcfs/user-time.h b/lnet/include/libcfs/user-time.h deleted file mode 100644 index 874b7da4fa100f110fe15ef26a60484f56dcf7fb..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/user-time.h +++ /dev/null @@ -1,205 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for user-level. - * - */ - -#ifndef __LIBCFS_USER_TIME_H__ -#define __LIBCFS_USER_TIME_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Portable time API */ - -/* - * Platform provides three opaque data-types: - * - * cfs_time_t represents point in time. This is internal kernel - * time rather than "wall clock". This time bears no - * relation to gettimeofday(). - * - * cfs_duration_t represents time interval with resolution of internal - * platform clock - * - * cfs_fs_time_t represents instance in world-visible time. This is - * used in file-system time-stamps - * - * cfs_time_t cfs_time_current(void); - * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); - * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); - * int cfs_time_before (cfs_time_t, cfs_time_t); - * int cfs_time_beforeq(cfs_time_t, cfs_time_t); - * - * cfs_duration_t cfs_duration_build(int64_t); - * - * time_t cfs_duration_sec (cfs_duration_t); - * void cfs_duration_usec(cfs_duration_t, struct timeval *); - * void cfs_duration_nsec(cfs_duration_t, struct timespec *); - * - * void cfs_fs_time_current(cfs_fs_time_t *); - * time_t cfs_fs_time_sec (cfs_fs_time_t *); - * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); - * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); - * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); - * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); - * - * CFS_TIME_FORMAT - * CFS_DURATION_FORMAT - * - */ - -#ifndef __KERNEL__ - -#define ONE_BILLION ((u_int64_t)1000000000) -#define ONE_MILLION 1000000 - -/* - * Liblustre. time(2) based implementation. - */ - -#include <sys/types.h> -#include <sys/time.h> -#include <time.h> - -typedef time_t cfs_fs_time_t; -typedef time_t cfs_time_t; -typedef long cfs_duration_t; - -static inline cfs_time_t cfs_time_current(void) -{ - return time(NULL); -} - -static inline cfs_duration_t cfs_time_seconds(int seconds) -{ - return seconds; -} - -static inline time_t cfs_time_current_sec(void) -{ - return cfs_time_seconds(cfs_time_current()); -} - -static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) -{ - return t1 < t2; -} - -static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) -{ - return t1 <= t2; -} - -static inline cfs_duration_t cfs_duration_build(int64_t nano) -{ - return (cfs_duration_t) (nano / ONE_BILLION); -} - -static inline time_t cfs_duration_sec(cfs_duration_t d) -{ - return d; -} - -static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) -{ - s->tv_sec = d; - s->tv_usec = 0; -} - -static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) -{ - s->tv_sec = d; - s->tv_nsec = 0; -} - -static inline void cfs_fs_time_current(cfs_fs_time_t *t) -{ - time(t); -} - -static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) -{ - return *t; -} - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - v->tv_sec = *t; - v->tv_usec = 0; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - s->tv_sec = *t; - s->tv_nsec = 0; -} - -static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return *t1 < *t2; -} - -static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return *t1 <= *t2; -} - -#define CFS_TICK (1) - -static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) -{ - return t + d; -} - -static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) -{ - return t1 - t2; -} - -#define cfs_time_current_64 cfs_time_current -#define cfs_time_add_64 cfs_time_add -#define cfs_time_shift_64 cfs_time_shift -#define cfs_time_before_64 cfs_time_before -#define cfs_time_beforeq_64 cfs_time_beforeq - -#ifndef CFS_TIME_T -#define CFS_TIME_T "%u" -#endif - -#define CFS_DURATION_T "%ld" - -/* !__KERNEL__ */ -#endif - -/* __LIBCFS_USER_TIME_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/winnt/kp30.h b/lnet/include/libcfs/winnt/kp30.h deleted file mode 100644 index 779d8be68d7c00d298fd1b73e5b58c514852aabc..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/kp30.h +++ /dev/null @@ -1,157 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __LIBCFS_WINNT_KP30_H__ -#define __LIBCFS_WINNT_KP30_H__ - -#ifndef __LIBCFS_KP30_H__ -#error Do not #include this file directly. #include <libcfs/kp30.h> instead -#endif - -#include <libcfs/winnt/portals_compat25.h> -#include <lnet/types.h> - -#ifdef __KERNEL__ - -/* Module parameter support */ -#define CFS_MODULE_PARM(name, t, type, perm, desc) - -#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */ - - -static inline void our_cond_resched() -{ - schedule_timeout(1i64); -} - -#ifdef CONFIG_SMP -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ -#else -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) -#endif - -#error Need a winnt version of panic() -#define LIBCFS_PANIC(msg) KeBugCheckEx(msg, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL) -#error libcfs_register_panic_notifier() missing -#error libcfs_unregister_panic_notifier() missing - -#define cfs_work_struct_t WORK_QUEUE_ITEM -#define cfs_prepare_work(tq, routine, contex) -#define cfs_schedule_work(tq) -#define cfs_get_work_data(type,field,data) (data) - -/* ------------------------------------------------------------------- */ - -#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x) -#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x) - -#define PORTAL_SYMBOL_GET(x) (cfs_symbol_get(#x)) -#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x) - -#define PORTAL_MODULE_USE do{}while(0) -#define PORTAL_MODULE_UNUSE do{}while(0) - -#define printk DbgPrint -#define ptintf DbgPrint - -#else /* !__KERNEL__ */ - -# include <stdio.h> -# include <stdlib.h> -#ifdef __CYGWIN__ -# include <cygwin-ioctl.h> -#endif -# include <time.h> - -#endif /* End of !__KERNEL__ */ - -/******************************************************************************/ -/* Light-weight trace - * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 0 - -/* kernel hasn't defined this? */ -typedef struct { - __s64 lwte_when; - char *lwte_where; - void *lwte_task; - long_ptr lwte_p1; - long_ptr lwte_p2; - long_ptr lwte_p3; - long_ptr lwte_p4; -# if BITS_PER_LONG > 32 - long_ptr lwte_pad; -# endif -} lwt_event_t; - - -# define LWT_EVENT(p1,p2,p3,p4) - - -/* ------------------------------------------------------------------ */ - -#define IOCTL_LIBCFS_TYPE long_ptr - -#ifdef __CYGWIN__ -# ifndef BITS_PER_LONG -# if (~0UL) == 0xffffffffUL -# define BITS_PER_LONG 32 -# else -# define BITS_PER_LONG 64 -# endif -# endif -#endif - -#if BITS_PER_LONG > 32 -# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) -# define LL_POISON ((long_ptr)0x5a5a5a5a5a5a5a5a) -# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a5a5a5a5a) -#else -# define LI_POISON ((int)0x5a5a5a5a) -# define LL_POISON ((long_ptr)0x5a5a5a5a) -# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a) -#endif - -#if defined(__x86_64__) -# define LPU64 "%I64u" -# define LPD64 "%I64d" -# define LPX64 "%I64x" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) -# define LPU64 "%I64u" -# define LPD64 "%I64d" -# define LPX64 "%I64x" -# define LPSZ "%u" -# define LPSSZ "%d" -#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) -# define LPU64 "%I64u" -# define LPD64 "%I64d" -# define LPX64 "%I64x" -# define LPSZ "%u" -# define LPSSZ "%d" -#endif -#ifndef LPU64 -# error "No word size defined" -#endif - -#endif diff --git a/lnet/include/libcfs/winnt/libcfs.h b/lnet/include/libcfs/winnt/libcfs.h deleted file mode 100644 index 386eb5f9e0052e139475d3c54047b8ba0bbced53..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/libcfs.h +++ /dev/null @@ -1,126 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __LIBCFS_WINNT_LIBCFS_H__ -#define __LIBCFS_WINNT_LIBCFS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* workgroud for VC compiler */ -#ifndef __FUNCTION__ -#define __FUNCTION__ "generic" -#endif - -#include <libcfs/winnt/winnt-types.h> -#include <libcfs/portals_utils.h> -#include <libcfs/winnt/winnt-time.h> -#include <libcfs/winnt/winnt-lock.h> -#include <libcfs/winnt/winnt-mem.h> -#include <libcfs/winnt/winnt-prim.h> -#include <libcfs/winnt/winnt-fs.h> -#include <libcfs/winnt/winnt-tcpip.h> - -struct ptldebug_header { - __u32 ph_len; - __u32 ph_flags; - __u32 ph_subsys; - __u32 ph_mask; - __u32 ph_cpu_id; - __u32 ph_sec; - __u64 ph_usec; - __u32 ph_stack; - __u32 ph_pid; - __u32 ph_extern_pid; - __u32 ph_line_num; -} __attribute__((packed)); - -#ifdef __KERNEL__ - -enum { - /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */ - CFS_STACK_TRACE_DEPTH = 16 -}; - -struct cfs_stack_trace { - void *frame[CFS_STACK_TRACE_DEPTH]; -}; - -static inline __u32 query_stack_size() -{ - ULONG LowLimit, HighLimit; - - IoGetStackLimits(&LowLimit, &HighLimit); - ASSERT(HighLimit > LowLimit); - - return (__u32) (HighLimit - LowLimit); -} -#else -static inline __u32 query_stack_size() -{ - return 4096; -} -#endif - - -#ifndef THREAD_SIZE -# define THREAD_SIZE query_stack_size() -#endif - -#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) - -#ifdef __KERNEL__ -# ifdef __ia64__ -# define CDEBUG_STACK() (THREAD_SIZE - \ - ((ulong_ptr)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -# else -# define CDEBUG_STACK (IoGetRemainingStackSize()) -# error "This doesn't seem right; CDEBUG_STACK should grow with the stack" -# endif /* __ia64__ */ - -#define CHECK_STACK() \ -do { \ - unsigned long _stack = CDEBUG_STACK(); \ - \ - if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) { \ - libcfs_stack = _stack; \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING, \ - __FILE__, NULL, __LINE__, \ - "maximum lustre stack %lu\n", _stack); \ - } \ -} while (0) -#else /* !__KERNEL__ */ -#define CHECK_STACK() do { } while(0) -#define CDEBUG_STACK() (0L) -#endif /* __KERNEL__ */ - -/* initial pid */ -#define LUSTRE_LNET_PID 12345 - -#define ENTRY_NESTING_SUPPORT (0) -#define ENTRY_NESTING do {;} while (0) -#define EXIT_NESTING do {;} while (0) -#define __current_nesting_level() (0) - -#endif /* _WINNT_LIBCFS_H */ diff --git a/lnet/include/libcfs/winnt/lltrace.h b/lnet/include/libcfs/winnt/lltrace.h deleted file mode 100644 index 9615e94e7c750d26233923a9d2b68c0ce81ee195..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/lltrace.h +++ /dev/null @@ -1,33 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_WINNT_LLTRACE_H__ -#define __LIBCFS_WINNT_LLTRACE_H__ - -#ifndef __LIBCFS_LLTRACE_H__ -#error Do not #include this file directly. #include <libcfs/lltrace.h> instead -#endif - - -#endif diff --git a/lnet/include/libcfs/winnt/portals_compat25.h b/lnet/include/libcfs/winnt/portals_compat25.h deleted file mode 100644 index 579b795c6550c3fcb837392b343411a156c58459..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/portals_compat25.h +++ /dev/null @@ -1,28 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __LIBCFS_WINNT_PORTALS_COMPAT_H__ -#define __LIBCFS_WINNT_PORTALS_COMPAT_H__ - - - -#endif /* _PORTALS_COMPAT_H */ diff --git a/lnet/include/libcfs/winnt/portals_utils.h b/lnet/include/libcfs/winnt/portals_utils.h deleted file mode 100644 index ec806925cdef007a29f40fa2b09b86d99fe417c2..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/portals_utils.h +++ /dev/null @@ -1,168 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_WINNT_PORTALS_UTILS_H__ -#define __LIBCFS_WINNT_PORTALS_UTILS_H__ - -#ifndef __LIBCFS_PORTALS_UTILS_H__ -#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead -#endif - -#ifndef cfs_is_flag_set -#define cfs_is_flag_set(x,f) (((x)&(f))==(f)) -#endif - -#ifndef cfs_set_flag -#define cfs_set_flag(x,f) ((x) |= (f)) -#endif - -#ifndef cfs_clear_flag -#define cfs_clear_flag(x,f) ((x) &= ~(f)) -#endif - - -static inline __u32 __do_div(__u32 * n, __u32 b) -{ - __u32 mod; - - mod = *n % b; - *n = *n / b; - return mod; -} - -#define do_div(n,base) __do_div((__u32 *)&(n), (__u32) (base)) - -#ifdef __KERNEL__ - -#include <stdlib.h> -#include <libcfs/winnt/winnt-types.h> - -char * strsep(char **s, const char *ct); -static inline size_t strnlen(const char * s, size_t count) { - size_t len = 0; - while(len < count && s[len++]); - return len; -} -char * ul2dstr(ulong_ptr address, char *buf, int len); - -#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3) -#define simple_strtoll(a1, a2, a3) (__s64)strtoull(a1, a2, a3) -#define simple_strtoull(a1, a2, a3) strtoull(a1, a2, a3) - -unsigned long simple_strtoul(const char *cp,char **endp, unsigned int base); - -static inline int test_bit(int nr, void * addr) -{ - return ((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0; -} - -static inline void clear_bit(int nr, void * addr) -{ - (((volatile ULONG *) addr)[nr >> 5]) &= (~(1UL << (nr & 31))); -} - - -static inline void set_bit(int nr, void * addr) -{ - (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31)); -} - -static inline void read_random(char *buf, int len) -{ - ULONG Seed = (ULONG) buf; - Seed = RtlRandom(&Seed); - while (len >0) { - if (len > sizeof(ULONG)) { - memcpy(buf, &Seed, sizeof(ULONG)); - len -= sizeof(ULONG); - buf += sizeof(ULONG); - } else { - memcpy(buf, &Seed, len); - len = 0; - break; - } - } -} -#define get_random_bytes(buf, len) read_random(buf, len) - -/* do NOT use function or expression as parameters ... */ - -#ifndef min_t -#define min_t(type,x,y) (type)(x) < (type)(y) ? (x): (y) -#endif - -#ifndef max_t -#define max_t(type,x,y) (type)(x) < (type)(y) ? (y): (x) -#endif - - -#define NIPQUAD(addr) \ - ((unsigned char *)&addr)[0], \ - ((unsigned char *)&addr)[1], \ - ((unsigned char *)&addr)[2], \ - ((unsigned char *)&addr)[3] - -#define HIPQUAD(addr) \ - ((unsigned char *)&addr)[3], \ - ((unsigned char *)&addr)[2], \ - ((unsigned char *)&addr)[1], \ - ((unsigned char *)&addr)[0] - -static int copy_from_user(void *to, void *from, int c) -{ - memcpy(to, from, c); - return 0; -} - -static int copy_to_user(void *to, void *from, int c) -{ - memcpy(to, from, c); - return 0; -} - - -#define put_user(x, ptr) \ -( \ - *(ptr) = x, \ - 0 \ -) - - -#define get_user(x,ptr) \ -( \ - x = *(ptr), \ - 0 \ -) - -#define num_physpages (64 * 1024) - -#define snprintf _snprintf -#define vsnprintf _vsnprintf - - -#endif /* !__KERNEL__ */ - -int cfs_error_code(NTSTATUS); - -#endif diff --git a/lnet/include/libcfs/winnt/winnt-fs.h b/lnet/include/libcfs/winnt/winnt-fs.h deleted file mode 100644 index 088d0e043d51292b4ebf216fec5759a5d10e71c9..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-fs.h +++ /dev/null @@ -1,254 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * File operations & routines. - * - */ - -#ifndef __LIBCFS_WINNT_CFS_FS_H__ -#define __LIBCFS_WINNT_CFS_FS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - - -#define MINORBITS 8 -#define MINORMASK ((1U << MINORBITS) - 1) - -#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) -#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) -#define NODEV 0 -#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) - - -#ifdef __KERNEL__ - -struct file_operations -{ - loff_t (*lseek)(struct file * file, loff_t offset, int origin); - ssize_t (*read) (struct file * file, char * buf, size_t nbytes, loff_t *ppos); - ssize_t (*write)(struct file * file, const char * buffer, - size_t count, loff_t *ppos); - int (*ioctl) (struct file *, unsigned int, ulong_ptr); - int (*open) (struct file *); - int (*release) (struct file *); -}; - -struct file { - - cfs_handle_t f_handle; - unsigned int f_flags; - mode_t f_mode; - ulong_ptr f_count; - - //struct list_head f_list; - //struct dentry * f_dentry; - - cfs_proc_entry_t * proc_dentry; - cfs_file_operations_t * f_op; - - size_t f_size; - loff_t f_pos; - unsigned int f_uid, f_gid; - int f_error; - - ulong_ptr f_version; - - void * private_data; - - char f_name[1]; - -}; - -#define cfs_filp_size(f) ((f)->f_size) -#define cfs_filp_poff(f) (&(f)->f_pos) - -cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err); -int cfs_filp_close(cfs_file_t *fp); -int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos); -int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos); -int cfs_filp_fsync(cfs_file_t *fp); -int cfs_get_file(cfs_file_t *fp); -int cfs_put_file(cfs_file_t *fp); -int cfs_file_count(cfs_file_t *fp); - - - -/* - * CFS_FLOCK routines - */ - -typedef struct file_lock{ - int fl_type; - pid_t fl_pid; - size_t fl_len; - off_t fl_start; - off_t fl_end; -} cfs_flock_t; - -#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) -#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t) - -#define cfs_flock_type(fl) ((fl)->fl_type) -#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0) -#define cfs_flock_pid(fl) ((fl)->fl_pid) -#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0) -#define cfs_flock_start(fl) ((fl)->fl_start) -#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0) -#define cfs_flock_end(fl) ((fl)->fl_end) -#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0) - -#define ATTR_MODE 0x0001 -#define ATTR_UID 0x0002 -#define ATTR_GID 0x0004 -#define ATTR_SIZE 0x0008 -#define ATTR_ATIME 0x0010 -#define ATTR_MTIME 0x0020 -#define ATTR_CTIME 0x0040 -#define ATTR_ATIME_SET 0x0080 -#define ATTR_MTIME_SET 0x0100 -#define ATTR_FORCE 0x0200 /* Not a change, but a change it */ -#define ATTR_ATTR_FLAG 0x0400 -#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -//#define ATTR_CTIME_SET 0x2000 -#define ATTR_BLOCKS 0x4000 -#define ATTR_KILL_SUID 0 -#define ATTR_KILL_SGID 0 - -#define in_group_p(x) (0) - -/* - * proc fs routines - */ - -int proc_init_fs(); -void proc_destroy_fs(); - - -/* - * misc - */ - -static inline void *ERR_PTR(long_ptr error) -{ - return (void *) error; -} - -static inline long_ptr PTR_ERR(const void *ptr) -{ - return (long_ptr) ptr; -} - -static inline long_ptr IS_ERR(const void *ptr) -{ - return (ulong_ptr)ptr > (ulong_ptr)-1000L; -} - -#else /* !__KERNEL__ */ - -#define CREATE_NEW 1 -#define CREATE_ALWAYS 2 -#define OPEN_EXISTING 3 -#define OPEN_ALWAYS 4 -#define TRUNCATE_EXISTING 5 - -#define SECTION_QUERY 0x0001 -#define SECTION_MAP_WRITE 0x0002 -#define SECTION_MAP_READ 0x0004 -#define SECTION_MAP_EXECUTE 0x0008 -#define SECTION_EXTEND_SIZE 0x0010 - -#define FILE_MAP_COPY SECTION_QUERY -#define FILE_MAP_WRITE SECTION_MAP_WRITE -#define FILE_MAP_READ SECTION_MAP_READ -#define FILE_MAP_ALL_ACCESS SECTION_ALL_ACCESS - - -NTSYSAPI -HANDLE -NTAPI -CreateFileA( - IN LPCSTR lpFileName, - IN DWORD dwDesiredAccess, - IN DWORD dwShareMode, - IN PVOID lpSecurityAttributes, - IN DWORD dwCreationDisposition, - IN DWORD dwFlagsAndAttributes, - IN HANDLE hTemplateFile - ); - -#define CreateFile CreateFileA - -NTSYSAPI -BOOL -NTAPI -CloseHandle( - IN OUT HANDLE hObject - ); - -NTSYSAPI -HANDLE -NTAPI -CreateFileMappingA( - IN HANDLE hFile, - IN PVOID lpFileMappingAttributes, - IN DWORD flProtect, - IN DWORD dwMaximumSizeHigh, - IN DWORD dwMaximumSizeLow, - IN LPCSTR lpName - ); -#define CreateFileMapping CreateFileMappingA - -NTSYSAPI -DWORD -NTAPI -GetFileSize( - IN HANDLE hFile, - OUT DWORD * lpFileSizeHigh - ); - -NTSYSAPI -PVOID -NTAPI -MapViewOfFile( - IN HANDLE hFileMappingObject, - IN DWORD dwDesiredAccess, - IN DWORD dwFileOffsetHigh, - IN DWORD dwFileOffsetLow, - IN SIZE_T dwNumberOfBytesToMap - ); - -NTSYSAPI -BOOL -NTAPI -UnmapViewOfFile( - IN PVOID lpBaseAddress - ); - -#endif /* __KERNEL__ */ - -typedef struct { - void *d; -} cfs_dentry_t; - - -#endif /* __LIBCFS_WINNT_CFS_FS_H__*/ diff --git a/lnet/include/libcfs/winnt/winnt-lock.h b/lnet/include/libcfs/winnt/winnt-lock.h deleted file mode 100644 index e0b9393eaa40c1ad7ef7ba6c39ec1d459349fb90..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-lock.h +++ /dev/null @@ -1,686 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_WINNT_CFS_LOCK_H__ -#define __LIBCFS_WINNT_CFS_LOCK_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ - - -/* - * nt specific part ... - */ - - -/* atomic */ - -typedef struct { volatile int counter; } atomic_t; - -#define ATOMIC_INIT(i) { i } - -#define atomic_read(v) ((v)->counter) -#define atomic_set(v,i) (((v)->counter) = (i)) - -void FASTCALL atomic_add(int i, atomic_t *v); -void FASTCALL atomic_sub(int i, atomic_t *v); - -int FASTCALL atomic_sub_and_test(int i, atomic_t *v); - -void FASTCALL atomic_inc(atomic_t *v); -void FASTCALL atomic_dec(atomic_t *v); - -int FASTCALL atomic_dec_and_test(atomic_t *v); -int FASTCALL atomic_inc_and_test(atomic_t *v); - - -/* event */ - -typedef KEVENT event_t; - -/* - * cfs_init_event - * To initialize the event object - * - * Arguments: - * event: pointer to the event object - * type: Non Zero: SynchronizationEvent - * Zero: NotificationEvent - * status: the initial stats of the event - * Non Zero: signaled - * Zero: un-signaled - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ -static inline void - cfs_init_event(event_t *event, int type, int status) -{ - KeInitializeEvent( - event, - (type) ? SynchronizationEvent: NotificationEvent, - (status) ? TRUE : FALSE - ); -} - -/* - * cfs_wait_event - * To wait on an event to syncrhonize the process - * - * Arguments: - * event: pointer to the event object - * timeout: the timeout for waitting or 0 means infinite time. - * - * Return Value: - * Zero: waiting timeouts - * Non Zero: event signaled ... - * - * Notes: - * N/A - */ - -static inline int64_t -cfs_wait_event(event_t * event, int64_t timeout) -{ - NTSTATUS Status; - LARGE_INTEGER TimeOut; - - TimeOut.QuadPart = -1 * (10000000/HZ) * timeout; - - Status = KeWaitForSingleObject( - event, - Executive, - KernelMode, - FALSE, - (timeout != 0) ? (&TimeOut) : (NULL) - ); - - if (Status == STATUS_TIMEOUT) { - return 0; - } - - return TRUE; // signaled case -} - -/* - * cfs_wake_event - * To signal the event object - * - * Arguments: - * event: pointer to the event object - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline int -cfs_wake_event(event_t * event) -{ - return (KeSetEvent(event, 0, FALSE) != 0); -} - -/* - * cfs_clear_event - * To clear/reset the status of the event object - * - * Arguments: - * event: pointer to the event object - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void -cfs_clear_event(event_t * event) -{ - KeResetEvent(event); -} - - -/* - * IMPORTANT !!!!!!!! - * - * All locks' declaration are not guaranteed to be initialized, - * Althought some of they are initialized in Linux. All locks - * declared by CFS_DECL_* should be initialized explicitly. - */ - - -/* - * spin lock defintions / routines - */ - -/* - * Warning: - * - * for spinlock operations, try to grab nesting acquisition of - * spinlock will cause dead-lock in MP system and current irql - * overwritten for UP system. (UP system could allow nesting spin - * acqisition, because it's not spin at all just raising the irql.) - * - */ - -typedef struct spin_lock { - - KSPIN_LOCK lock; - KIRQL irql; - -} spinlock_t; - - -#define CFS_DECL_SPIN(name) spinlock_t name; -#define CFS_DECL_SPIN_EXTERN(name) extern spinlock_t name; - - -static inline void spin_lock_init(spinlock_t *lock) -{ - KeInitializeSpinLock(&(lock->lock)); -} - - -static inline void spin_lock(spinlock_t *lock) -{ - KeAcquireSpinLock(&(lock->lock), &(lock->irql)); -} - -static inline void spin_unlock(spinlock_t *lock) -{ - KIRQL irql = lock->irql; - KeReleaseSpinLock(&(lock->lock), irql); -} - - -#define spin_lock_irqsave(lock, flags) do {(flags) = 0; spin_lock(lock);} while(0) -#define spin_unlock_irqrestore(lock, flags) do {spin_unlock(lock);} while(0) - - -/* There's no corresponding routine in windows kernel. - We must realize a light one of our own. But there's - no way to identify the system is MP build or UP build - on the runtime. We just uses a workaround for it. */ - -extern int MPSystem; - -static int spin_trylock(spinlock_t *lock) -{ - KIRQL Irql; - int rc = 0; - - ASSERT(lock != NULL); - - KeRaiseIrql(DISPATCH_LEVEL, &Irql); - - if (MPSystem) { - if (0 == (ulong_ptr)lock->lock) { -#if _X86_ - __asm { - mov edx, dword ptr [ebp + 8] - lock bts dword ptr[edx], 0 - jb lock_failed - mov rc, TRUE - lock_failed: - } -#else - KdBreakPoint(); -#endif - - } - } else { - rc = TRUE; - } - - if (rc) { - lock->irql = Irql; - } else { - KeLowerIrql(Irql); - } - - return rc; -} - -/* synchronization between cpus: it will disable all DPCs - kernel task scheduler on the CPU */ -#define spin_lock_bh(x) spin_lock(x) -#define spin_unlock_bh(x) spin_unlock(x) -#define spin_lock_bh_init(x) spin_lock_init(x) - -/* - * rw_semaphore (using ERESOURCE) - */ - - -typedef struct rw_semaphore { - ERESOURCE rwsem; -} rw_semaphore_t; - - -#define CFS_DECL_RWSEM(name) rw_semaphore_t name -#define CFS_DECL_RWSEM_EXTERN(name) extern rw_semaphore_t name - - -/* - * init_rwsem - * To initialize the the rw_semaphore_t structure - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void init_rwsem(rw_semaphore_t *s) -{ - ExInitializeResourceLite(&s->rwsem); -} - - -/* - * fini_rwsem - * To finilize/destroy the the rw_semaphore_t structure - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * For winnt system, we need this routine to delete the ERESOURCE. - * Just define it NULL for other systems. - */ - -static inline void fini_rwsem(rw_semaphore_t *s) -{ - ExDeleteResourceLite(&s->rwsem); -} - -/* - * down_read - * To acquire read-lock of the rw_semahore - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void down_read(struct rw_semaphore *s) -{ - ExAcquireResourceSharedLite(&s->rwsem, TRUE); -} - - -/* - * down_read_trylock - * To acquire read-lock of the rw_semahore without blocking - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * Zero: failed to acquire the read lock - * Non-Zero: succeeded to acquire the read lock - * - * Notes: - * This routine will return immediately without waiting. - */ - -static inline int down_read_trylock(struct rw_semaphore *s) -{ - return ExAcquireResourceSharedLite(&s->rwsem, FALSE); -} - - -/* - * down_write - * To acquire write-lock of the rw_semahore - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void down_write(struct rw_semaphore *s) -{ - ExAcquireResourceExclusiveLite(&(s->rwsem), TRUE); -} - - -/* - * down_write_trylock - * To acquire write-lock of the rw_semahore without blocking - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * Zero: failed to acquire the write lock - * Non-Zero: succeeded to acquire the read lock - * - * Notes: - * This routine will return immediately without waiting. - */ - -static inline int down_write_trylock(struct rw_semaphore *s) -{ - return ExAcquireResourceExclusiveLite(&(s->rwsem), FALSE); -} - - -/* - * up_read - * To release read-lock of the rw_semahore - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void up_read(struct rw_semaphore *s) -{ - ExReleaseResourceForThreadLite( - &(s->rwsem), - ExGetCurrentResourceThread()); -} - - -/* - * up_write - * To release write-lock of the rw_semahore - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void up_write(struct rw_semaphore *s) -{ - ExReleaseResourceForThreadLite( - &(s->rwsem), - ExGetCurrentResourceThread()); -} - -/* - * rwlock_t (using sempahore) - * - * - rwlock_init(x) - * - read_lock(x) - * - read_unlock(x) - * - write_lock(x) - * - write_unlock(x) - */ - -typedef struct { - spinlock_t guard; - int count; -} rwlock_t; - -void rwlock_init(rwlock_t * rwlock); -void rwlock_fini(rwlock_t * rwlock); - -void read_lock(rwlock_t * rwlock); -void read_unlock(rwlock_t * rwlock); -void write_lock(rwlock_t * rwlock); -void write_unlock(rwlock_t * rwlock); - -#define write_lock_irqsave(l, f) do {f = 0; write_lock(l);} while(0) -#define write_unlock_irqrestore(l, f) do {write_unlock(l);} while(0) -#define read_lock_irqsave(l, f) do {f=0; read_lock(l);} while(0) -#define read_unlock_irqrestore(l, f) do {read_unlock(l);} while(0) - - -/* - * Semaphore - * - * - sema_init(x, v) - * - __down(x) - * - __up(x) - */ - -typedef struct semaphore { - KSEMAPHORE sem; -} mutex_t; - -static inline void sema_init(struct semaphore *s, int val) -{ - KeInitializeSemaphore(&s->sem, val, val); -} - -static inline void __down(struct semaphore *s) -{ - KeWaitForSingleObject( &(s->sem), Executive, - KernelMode, FALSE, NULL ); - -} - -static inline void __up(struct semaphore *s) -{ - KeReleaseSemaphore(&s->sem, 0, 1, FALSE); -} - -/* - * mutex_t: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ - - -/* - * init_mutex - * To initialize a mutex_t structure - * - * Arguments: - * mutex: pointer to the mutex_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void init_mutex(mutex_t *mutex) -{ - sema_init(mutex, 1); -} - - -/* - * mutex_down - * To acquire the mutex lock - * - * Arguments: - * mutex: pointer to the mutex_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void mutex_down(mutex_t *mutex) -{ - __down(mutex); -} - - -/* - * mutex_up - * To release the mutex lock (acquired already) - * - * Arguments: - * mutex: pointer to the mutex_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void mutex_up(mutex_t *mutex) -{ - __up(mutex); -} - - -/* - * init_mutex_locked - * To initialize the mutex as acquired state - * - * Arguments: - * mutex: pointer to the mutex_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline init_mutex_locked(mutex_t *mutex) -{ - init_mutex(mutex); - mutex_down(mutex); -} - -/* - * completion - * - * - init_complition(c) - * - complete(c) - * - wait_for_completion(c) - */ - -struct completion { - event_t event; -}; - - -/* - * init_completion - * To initialize the completion object - * - * Arguments: - * c: pointer to the completion structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void init_completion(struct completion *c) -{ - cfs_init_event(&(c->event), 1, FALSE); -} - - -/* - * complete - * To complete/signal the completion object - * - * Arguments: - * c: pointer to the completion structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void complete(struct completion *c) -{ - cfs_wake_event(&(c->event)); -} - -/* - * wait_for_completion - * To wait on the completion object. If the event is signaled, - * this function will return to the call with the event un-singled. - * - * Arguments: - * c: pointer to the completion structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void wait_for_completion(struct completion *c) -{ - cfs_wait_event(&(c->event), 0); -} - -/* __KERNEL__ */ -#else - -#include "../user-lock.h" - -/* __KERNEL__ */ -#endif -#endif diff --git a/lnet/include/libcfs/winnt/winnt-mem.h b/lnet/include/libcfs/winnt/winnt-mem.h deleted file mode 100644 index b7f00a4165a1fa5051c21c31836726a162e69919..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-mem.h +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines of memory manipulation routines . - * - */ - -#ifndef __LIBCFS_WINNT_CFS_MEM_H__ -#define __LIBCFS_WINNT_CFS_MEM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ - -#define CFS_PAGE_SIZE PAGE_SIZE -#define CFS_PAGE_SHIFT PAGE_SHIFT -#define CFS_PAGE_MASK (~(PAGE_SIZE - 1)) - -typedef struct cfs_page { - void * addr; - atomic_t count; -} cfs_page_t; - - -cfs_page_t *cfs_alloc_page(int flags); -void cfs_free_page(cfs_page_t *pg); - -static inline void *cfs_page_address(cfs_page_t *page) -{ - return page->addr; -} - -static inline void *cfs_kmap(cfs_page_t *page) -{ - return page->addr; -} - -static inline void cfs_kunmap(cfs_page_t *page) -{ - return; -} - -static inline void cfs_get_page(cfs_page_t *page) -{ - atomic_inc(&page->count); -} - -static inline void cfs_put_page(cfs_page_t *page) -{ - atomic_dec(&page->count); -} - -static inline int cfs_page_count(cfs_page_t *page) -{ - return atomic_read(&page->count); -} - -/* - * Memory allocator - */ - -#define CFS_ALLOC_ATOMIC_TRY (0) - -extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags); -extern void cfs_free(void *addr); - -extern void *cfs_alloc_large(size_t nr_bytes); -extern void cfs_free_large(void *addr); - -/* - * SLAB allocator - */ - -#define SLAB_HWCACHE_ALIGN 0 - -/* The cache name is limited to 20 chars */ - -typedef struct cfs_mem_cache { - - char name[20]; - ulong_ptr flags; - NPAGED_LOOKASIDE_LIST npll; - -} cfs_mem_cache_t; - - -extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, ulong_ptr); -extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); -extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); -extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); - - -/* - * Page allocator slabs - */ - -extern cfs_mem_cache_t *cfs_page_t_slab; -extern cfs_mem_cache_t *cfs_page_p_slab; - - -#define CFS_DECL_MMSPACE -#define CFS_MMSPACE_OPEN do {} while(0) -#define CFS_MMSPACE_CLOSE do {} while(0) - - -#define mb() do {} while(0) -#define rmb() mb() -#define wmb() mb() - - -/* __KERNEL__ */ -#endif - -#endif /* __WINNT_CFS_MEM_H__ */ diff --git a/lnet/include/libcfs/winnt/winnt-prim.h b/lnet/include/libcfs/winnt/winnt-prim.h deleted file mode 100644 index 3c8560b71c952f95e5aacf4404100ce68136840c..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-prim.h +++ /dev/null @@ -1,1082 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_WINNT_CFS_PRIM_H__ -#define __LIBCFS_WINNT_CFS_PRIM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - - -/* - * libcfs proc device object - */ - - -#define LUSTRE_PROC_DEVICE L"\\Device\\lproc" /* proc fs emulator device object */ -#define LUSTRE_PROC_SYMLNK L"\\DosDevices\\lproc" /* proc fs user-visible device */ - - -/* - * Device IO Control Code Definitions - */ - -#define FILE_DEVICE_LIBCFS ('LC') - -#define FILE_DEVICE_LIBCFS ('LC') - -#define FUNC_LIBCFS_VERSION 0x101 // get version of current libcfs -#define FUNC_LIBCFS_IOCTL 0x102 // Device i/o control to proc fs - - -#define IOCTL_LIBCFS_VERSION \ - CTL_CODE (FILE_DEVICE_LIBCFS, FUNC_LIBCFS_VERSION, METHOD_BUFFERED, FILE_ANY_ACCESS) -#define IOCTL_LIBCFS_ENTRY \ - CTL_CODE(FILE_DEVICE_LIBCFS, FUNC_LIBCFS_IOCTL, METHOD_BUFFERED, FILE_ANY_ACCESS) - -#pragma pack(4) - -typedef struct _CFS_PROC_IOCTL { - - ULONG cmd; // ioctl command identifier - ULONG len; // length of data - - // UCHAR data[]; // content of the real ioctl - -} CFS_PROC_IOCTL, *PCFS_PROC_IOCTL; - -#pragma pack() - -#ifdef __KERNEL__ - -#include <libcfs/list.h> - -/* - * Symbol functions for libcfs - * - * OSX has no facility for use to register symbol. - * So we have to implement it. - */ -#define CFS_SYMBOL_LEN 64 - -struct cfs_symbol { - char name[CFS_SYMBOL_LEN]; - void *value; - int ref; - struct list_head sym_list; -}; - -extern int cfs_symbol_register(const char *, const void *); -extern void cfs_symbol_unregister(const char *); -extern void * cfs_symbol_get(const char *); -extern void cfs_symbol_put(const char *); -extern void cfs_symbol_clean(); - - - -typedef struct file_operations cfs_file_operations_t; -typedef struct file cfs_file_t; - -/* - * Pseudo device register - */ - -typedef struct -{ - int minor; - const char * name; - cfs_file_operations_t * fops; -} cfs_psdev_t; - -int cfs_psdev_register(cfs_psdev_t * psdev); -int cfs_psdev_deregister(cfs_psdev_t * psdev); - - -/* - * Proc emulator file system APIs - */ - -typedef int cfs_read_proc_t(char *page, char **start, off_t off, - int count, int *eof, void *data); -typedef int cfs_write_proc_t(struct file *file, const char *buffer, - ulong_ptr count, void *data); - -#define CFS_PROC_ENTRY_MAGIC 'CPEM' - -#define CFS_PROC_FLAG_DIRECTORY 0x00000001 // directory node -#define CFS_PROC_FLAG_ATTACHED 0x00000002 // node is attached to proc -#define CFS_PROC_FLAG_MISCDEV 0x00000004 // miscellaneous device - -typedef struct cfs_proc_entry -{ - ULONG magic; // Magic - ULONG flags; // Flags - - struct _dir_entry { // proc directory entry - PRTL_SPLAY_LINKS root; - }; - - struct _file_entry { // proc file / leaf entry - cfs_read_proc_t * read_proc; - cfs_write_proc_t * write_proc; - }; - - mode_t mode; - unsigned short nlink; - - - struct file_operations * proc_fops; - void * data; - - // proc_dir_entry ended. - - RTL_SPLAY_LINKS s_link; // splay link - - // - // Maximum length of proc entry name is 0x20 - // - - char name[0x20]; - -} cfs_proc_entry_t, cfs_proc_dir_entry_t; - -typedef cfs_proc_entry_t cfs_proc_dir_entry_t; - -#define PROC_BLOCK_SIZE PAGE_SIZE - -/* - * Sysctl register - */ - -typedef struct ctl_table cfs_sysctl_table_t; -typedef struct ctl_table_header cfs_sysctl_table_header_t; - - -typedef int ctl_handler ( - cfs_sysctl_table_t *table, - int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, - void **context ); - -typedef int proc_handler ( - cfs_sysctl_table_t *ctl, - int write, struct file * filp, - void *buffer, size_t *lenp ); - - -int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp); - -int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp); - -int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, void **context); - - -/* - * System io control definitions - */ - -#define CTL_MAXNAME 10 - -#define CTL_ANY -1 /* Matches any name */ -#define CTL_NONE 0 - -enum -{ - CTL_KERN=1, /* General kernel info and control */ - CTL_VM=2, /* VM management */ - CTL_NET=3, /* Networking */ - CTL_PROC=4, /* Process info */ - CTL_FS=5, /* Filesystems */ - CTL_DEBUG=6, /* Debugging */ - CTL_DEV=7, /* Devices */ - CTL_BUS=8, /* Busses */ - CTL_ABI=9, /* Binary emulation */ - CTL_CPU=10 /* CPU stuff (speed scaling, etc) */ -}; - -/* sysctl table definitons */ -struct ctl_table -{ - int ctl_name; - char *procname; - void *data; - int maxlen; - mode_t mode; - cfs_sysctl_table_t *child; - proc_handler *proc_handler; /* text formatting callback */ - ctl_handler *strategy; /* read / write callback functions */ - cfs_proc_entry_t *de; /* proc entry block */ - void *extra1; - void *extra2; -}; - - -/* the mantaner of the cfs_sysctl_table trees */ -struct ctl_table_header -{ - cfs_sysctl_table_t * ctl_table; - struct list_head ctl_entry; -}; - - -cfs_proc_entry_t * create_proc_entry(char *name, mode_t mod, - cfs_proc_entry_t *parent); -void proc_free_entry(cfs_proc_entry_t *de); -void remove_proc_entry(char *name, cfs_proc_entry_t *entry); -cfs_proc_entry_t * search_proc_entry(char * name, - cfs_proc_entry_t * root ); - -#define cfs_create_proc_entry create_proc_entry -#define cfs_free_proc_entry proc_free_entry -#define cfs_remove_proc_entry remove_proc_entry - -#define register_cfs_sysctl_table(t, a) register_sysctl_table(t, a) -#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a) - - -/* - * declaration of proc kernel process routines - */ - -cfs_file_t * -lustre_open_file(char * filename); - -int -lustre_close_file(cfs_file_t * fh); - -int -lustre_do_ioctl( cfs_file_t * fh, - unsigned long cmd, - ulong_ptr arg ); - -int -lustre_ioctl_file( cfs_file_t * fh, - PCFS_PROC_IOCTL devctl); - -size_t -lustre_read_file( cfs_file_t * fh, - loff_t off, - size_t size, - char * buf - ); - -size_t -lustre_write_file( cfs_file_t * fh, - loff_t off, - size_t size, - char * buf - ); - -/* - * Wait Queue - */ - - -typedef int cfs_task_state_t; - -#define CFS_TASK_INTERRUPTIBLE 0x00000001 -#define CFS_TASK_UNINT 0x00000002 - - - -#define CFS_WAITQ_MAGIC 'CWQM' -#define CFS_WAITLINK_MAGIC 'CWLM' - -typedef struct cfs_waitq { - - unsigned int magic; - unsigned int flags; - - spinlock_t guard; - struct list_head waiters; - -} cfs_waitq_t; - - -typedef struct cfs_waitlink cfs_waitlink_t; - -#define CFS_WAITQ_CHANNELS (2) - -#define CFS_WAITQ_CHAN_NORMAL (0) -#define CFS_WAITQ_CHAN_FORWARD (1) - - - -typedef struct cfs_waitlink_channel { - struct list_head link; - cfs_waitq_t * waitq; - cfs_waitlink_t * waitl; -} cfs_waitlink_channel_t; - -struct cfs_waitlink { - - unsigned int magic; - int flags; - event_t * event; - atomic_t * hits; - - cfs_waitlink_channel_t waitq[CFS_WAITQ_CHANNELS]; -}; - -enum { - CFS_WAITQ_EXCLUSIVE = 1 -}; - -#define CFS_DECL_WAITQ(name) cfs_waitq_t name - - -void cfs_waitq_init(struct cfs_waitq *waitq); -void cfs_waitlink_init(struct cfs_waitlink *link); - -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, - struct cfs_waitlink *link); -void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); -void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); -int cfs_waitq_active(struct cfs_waitq *waitq); - -void cfs_waitq_signal(struct cfs_waitq *waitq); -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); -void cfs_waitq_broadcast(struct cfs_waitq *waitq); - -void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state); -cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, - cfs_task_state_t state, cfs_duration_t timeout); - - - -/* Kernel thread */ - -typedef int (*cfs_thread_t) (void *arg); - -typedef struct _cfs_thread_context { - cfs_thread_t func; - void * arg; -} cfs_thread_context_t; - -int cfs_kernel_thread(int (*func)(void *), void *arg, int flag); - -/* - * thread creation flags from Linux, not used in winnt - */ -#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ -#define CLONE_VM 0x00000100 /* set if VM shared between processes */ -#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ -#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ -#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ -#define CLONE_PID 0x00001000 /* set if pid shared */ -#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ -#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ -#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ -#define CLONE_THREAD 0x00010000 /* Same thread group? */ -#define CLONE_NEWNS 0x00020000 /* New namespace group? */ - -#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) - - -/* - * sigset ... - */ - -typedef sigset_t cfs_sigset_t; - -/* - * Task struct - */ - -#define MAX_SCHEDULE_TIMEOUT ((long_ptr)(~0UL>>12)) - - -#define NGROUPS 1 -#define CFS_CURPROC_COMM_MAX (16) -typedef struct task_sruct{ - mode_t umask; - - pid_t pid; - pid_t pgrp; - - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - - int ngroups; - gid_t groups[NGROUPS]; - cfs_kernel_cap_t cap_effective, - cap_inheritable, - cap_permitted; - - char comm[CFS_CURPROC_COMM_MAX]; - void * journal_info; -} cfs_task_t; - - -/* - * linux task struct emulator ... - */ - -#define TASKMAN_MAGIC 'TMAN' /* Task Manager */ -#define TASKSLT_MAGIC 'TSLT' /* Task Slot */ - -typedef struct _TASK_MAN { - - ULONG Magic; /* Magic and Flags */ - ULONG Flags; - - spinlock_t Lock; /* Protection lock */ - - cfs_mem_cache_t * slab; /* Memory slab for task slot */ - - ULONG NumOfTasks; /* Total tasks (threads) */ - LIST_ENTRY TaskList; /* List of task slots */ - -} TASK_MAN, *PTASK_MAN; - -typedef struct _TASK_SLOT { - - ULONG Magic; /* Magic and Flags */ - ULONG Flags; - - LIST_ENTRY Link; /* To be linked to TaskMan */ - - event_t Event; /* Schedule event */ - - HANDLE Pid; /* Process id */ - HANDLE Tid; /* Thread id */ - PETHREAD Tet; /* Pointer to ethread */ - - atomic_t count; /* refer count */ - atomic_t hits; /* times of waken event singaled */ - - KIRQL irql; /* irql for rwlock ... */ - - cfs_task_t task; /* linux task part */ - -} TASK_SLOT, *PTASK_SLOT; - - -#define current cfs_current() -#define set_current_state(s) do {;} while (0) - -#define wait_event(wq, condition) \ -do { \ - cfs_waitlink_t __wait; \ - \ - cfs_waitlink_init(&__wait); \ - while (TRUE) { \ - cfs_waitq_add(&wq, &__wait); \ - if (condition) { \ - break; \ - } \ - cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ - cfs_waitq_del(&wq, &__wait); \ - } \ - cfs_waitq_del(&wq, &__wait); \ -} while(0) - -#define wait_event_interruptible(wq, condition, __ret) \ -do { \ - cfs_waitlink_t __wait; \ - \ - __ret = 0; \ - cfs_waitlink_init(&__wait); \ - while (TRUE) { \ - cfs_waitq_add(&wq, &__wait); \ - if (condition) { \ - break; \ - } \ - cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ - cfs_waitq_del(&wq, &__wait); \ - } \ - cfs_waitq_del(&wq, &__wait); \ -} while(0) - - -int init_task_manager(); -void cleanup_task_manager(); -cfs_task_t * cfs_current(); -int schedule_timeout(int64_t time); -int schedule(); -int wake_up_process(cfs_task_t * task); -#define cfs_schedule_timeout(state, time) schedule_timeout(time) -void sleep_on(cfs_waitq_t *waitq); - -#define CFS_DECL_JOURNAL_DATA -#define CFS_PUSH_JOURNAL do {;} while(0) -#define CFS_POP_JOURNAL do {;} while(0) - - -/* module related definitions */ - -#ifndef __exit -#define __exit -#endif -#ifndef __init -#define __init -#endif - -#define request_module(x) (0) - -#define EXPORT_SYMBOL(s) -#define MODULE_AUTHOR(s) -#define MODULE_DESCRIPTION(s) -#define MODULE_LICENSE(s) -#define MODULE_PARM(a, b) -#define MODULE_PARM_DESC(a, b) - -#define module_init(X) int __init module_##X() {return X();} -#define module_exit(X) void __exit module_##X() {X();} - -#define DECLARE_INIT(X) extern int __init module_##X(void) -#define DECLARE_EXIT(X) extern void __exit module_##X(void) - -#define MODULE_INIT(X) do { int rc = module_##X(); \ - if (rc) goto errorout; \ - } while(0) - -#define MODULE_EXIT(X) do { module_##X(); } while(0) - - -/* Module interfaces */ -#define cfs_module(name, version, init, fini) \ -module_init(init); \ -module_exit(fini) - - -/* - * Linux kernel version definition - */ - -#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) -#define LINUX_VERSION_CODE (2*100+6*10+7) - - -/* - * Signal - */ -#define SIGNAL_MASK_ASSERT() - -/* - * Timer - */ - -#define CFS_TIMER_FLAG_INITED 0x00000001 // Initialized already -#define CFS_TIMER_FLAG_TIMERED 0x00000002 // KeSetTimer is called - -typedef struct cfs_timer { - - KSPIN_LOCK Lock; - - ULONG Flags; - - KDPC Dpc; - KTIMER Timer; - - cfs_time_t deadline; - - void (*proc)(ulong_ptr); - void * arg; - -} cfs_timer_t; - - -typedef void (*timer_func_t)(ulong_ptr); - -#define cfs_init_timer(t) - -void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr), void *arg); -void cfs_timer_done(cfs_timer_t *t); -void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline); -void cfs_timer_disarm(cfs_timer_t *t); -int cfs_timer_is_armed(cfs_timer_t *t); -cfs_time_t cfs_timer_deadline(cfs_timer_t *t); - - -/* deschedule for a bit... */ -static inline void cfs_pause(cfs_duration_t ticks) -{ - cfs_schedule_timeout(TASK_UNINTERRUPTIBLE, ticks); -} - - -static inline void cfs_enter_debugger(void) -{ -#if _X86_ - __asm int 3; -#else - KdBreakPoint(); -#endif -} - -/* - * libcfs globals initialization/cleanup - */ - -int -libcfs_arch_init(void); - -void -libcfs_arch_cleanup(void); - -/* - * SMP ... - */ - -#define SMP_CACHE_BYTES 128 -#define __cacheline_aligned -#define NR_CPUS (2) -#define smp_processor_id() KeGetCurrentProcessorNumber() -#define smp_num_cpus NR_CPUS -#define num_online_cpus() smp_num_cpus -#define smp_call_function(f, a, n, w) do {} while(0) - -/* - * Irp related - */ - -#define NR_IRQS 512 -#define in_interrupt() (0) - -/* - * printk flags - */ - -#define KERN_EMERG "<0>" /* system is unusable */ -#define KERN_ALERT "<1>" /* action must be taken immediately */ -#define KERN_CRIT "<2>" /* critical conditions */ -#define KERN_ERR "<3>" /* error conditions */ -#define KERN_WARNING "<4>" /* warning conditions */ -#define KERN_NOTICE "<5>" /* normal but significant condition */ -#define KERN_INFO "<6>" /* informational */ -#define KERN_DEBUG "<7>" /* debug-level messages */ - -/* - * Misc - */ - - -#define inter_module_get(n) cfs_symbol_get(n) -#define inter_module_put(n) cfs_symbol_put(n) - -#ifndef likely -#define likely(exp) (exp) -#endif -#ifndef unlikely -#define unlikely(exp) (exp) -#endif - -#define lock_kernel() do {} while(0) -#define unlock_kernel() do {} while(0) - -#define CAP_SYS_ADMIN 0 -#define CAP_SYS_ROOT 1 - -#define capable(a) (TRUE) - -#define USERMODEHELPER(path, argv, envp) (0) - - -#define local_irq_save(x) -#define local_irq_restore(x) - -#define cfs_assert ASSERT - -#define THREAD_NAME - -#else /* !__KERNEL__ */ - -#define PAGE_CACHE_SIZE PAGE_SIZE -#define PAGE_CACHE_MASK PAGE_MASK - -#define getpagesize() (PAGE_SIZE) - - -typedef struct { - int foo; -} pthread_mutex_t; - -typedef struct { - int foo; -} pthread_cond_t; - -#define pthread_mutex_init(x, y) do {} while(0) -#define pthread_cond_init(x, y) do {} while(0) - -#define pthread_mutex_lock(x) do {} while(0) -#define pthread_mutex_unlock(x) do {} while(0) - -#define pthread_cond_wait(x,y) do {} while(0) -#define pthread_cond_broadcast(x) do {} while(0) - -typedef struct file { - int foo; -} cfs_file_t; - -typedef struct cfs_proc_dir_entry{ - void *data; -}cfs_proc_dir_entry_t; - - - -#include "../user-prim.h" - -#include <sys/stat.h> -#include <sys/types.h> - -#define strcasecmp strcmp -#define strncasecmp strncmp -#define snprintf _snprintf -#define getpid() (0) - - -#define getpwuid(x) (NULL) -#define getgrgid(x) (NULL) - -int cfs_proc_mknod(const char *path, mode_t mode, dev_t dev); - -int gethostname(char * name, int namelen); - -#define setlinebuf(x) do {} while(0) - - -NTSYSAPI VOID NTAPI DebugBreak(); - - -static inline void cfs_enter_debugger(void) -{ -#if _X86_ - __asm int 3; -#else - DebugBreak(); -#endif -} - -/* Maximum EA Information Length */ -#define EA_MAX_LENGTH (sizeof(FILE_FULL_EA_INFORMATION) + 15) - - -/* - * proc user mode routines - */ - -HANDLE cfs_proc_open (char * filename, int oflag); -int cfs_proc_close(HANDLE handle); -int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count); -int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count); -int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer); - - -/* - * Native API definitions - */ - -// -// Disk I/O Routines -// - -NTSYSAPI -NTSTATUS -NTAPI -NtReadFile(HANDLE FileHandle, - HANDLE Event OPTIONAL, - PIO_APC_ROUTINE ApcRoutine OPTIONAL, - PVOID ApcContext OPTIONAL, - PIO_STATUS_BLOCK IoStatusBlock, - PVOID Buffer, - ULONG Length, - PLARGE_INTEGER ByteOffset OPTIONAL, - PULONG Key OPTIONAL); - -NTSYSAPI -NTSTATUS -NTAPI -NtWriteFile(HANDLE FileHandle, - HANDLE Event OPTIONAL, - PIO_APC_ROUTINE ApcRoutine OPTIONAL, - PVOID ApcContext OPTIONAL, - PIO_STATUS_BLOCK IoStatusBlock, - PVOID Buffer, - ULONG Length, - PLARGE_INTEGER ByteOffset OPTIONAL, - PULONG Key OPTIONAL); - -NTSYSAPI -NTSTATUS -NTAPI -NtClose(HANDLE Handle); - -NTSYSAPI -NTSTATUS -NTAPI -NtCreateFile(PHANDLE FileHandle, - ACCESS_MASK DesiredAccess, - POBJECT_ATTRIBUTES ObjectAttributes, - PIO_STATUS_BLOCK IoStatusBlock, - PLARGE_INTEGER AllocationSize OPTIONAL, - ULONG FileAttributes, - ULONG ShareAccess, - ULONG CreateDisposition, - ULONG CreateOptions, - PVOID EaBuffer OPTIONAL, - ULONG EaLength); - - -NTSYSAPI -NTSTATUS -NTAPI -NtDeviceIoControlFile( - IN HANDLE FileHandle, - IN HANDLE Event, - IN PIO_APC_ROUTINE ApcRoutine, - IN PVOID ApcContext, - OUT PIO_STATUS_BLOCK IoStatusBlock, - IN ULONG IoControlCode, - IN PVOID InputBuffer, - IN ULONG InputBufferLength, - OUT PVOID OutputBuffer, - OUT ULONG OutputBufferLength - ); - -NTSYSAPI -NTSTATUS -NTAPI -NtFsControlFile( - IN HANDLE FileHandle, - IN HANDLE Event OPTIONAL, - IN PIO_APC_ROUTINE ApcRoutine OPTIONAL, - IN PVOID ApcContext OPTIONAL, - OUT PIO_STATUS_BLOCK IoStatusBlock, - IN ULONG FsControlCode, - IN PVOID InputBuffer OPTIONAL, - IN ULONG InputBufferLength, - OUT PVOID OutputBuffer OPTIONAL, - IN ULONG OutputBufferLength -); - - -NTSYSAPI -NTSTATUS -NTAPI -NtQueryInformationFile( - IN HANDLE FileHandle, - OUT PIO_STATUS_BLOCK IoStatusBlock, - OUT PVOID FileInformation, - IN ULONG Length, - IN FILE_INFORMATION_CLASS FileInformationClass - ); - -// -// Random routines ... -// - -NTSYSAPI -ULONG -NTAPI -RtlRandom( - IN OUT PULONG Seed - ); - -#endif /* __KERNEL__ */ - - -// -// Inode flags (Linux uses octad number, but why ? strange!!!) -// - -#undef S_IFMT -#undef S_IFDIR -#undef S_IFCHR -#undef S_IFREG -#undef S_IREAD -#undef S_IWRITE -#undef S_IEXEC - -#define S_IFMT 0x0F000 /* 017 0000 */ -#define S_IFSOCK 0x0C000 /* 014 0000 */ -#define S_IFLNK 0x0A000 /* 012 0000 */ -#define S_IFREG 0x08000 /* 010 0000 */ -#define S_IFBLK 0x06000 /* 006 0000 */ -#define S_IFDIR 0x04000 /* 004 0000 */ -#define S_IFCHR 0x02000 /* 002 0000 */ -#define S_IFIFO 0x01000 /* 001 0000 */ -#define S_ISUID 0x00800 /* 000 4000 */ -#define S_ISGID 0x00400 /* 000 2000 */ -#define S_ISVTX 0x00200 /* 000 1000 */ - -#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) -#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) -#define S_ISFIL(m) (((m) & S_IFMT) == S_IFFIL) -#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) -#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) -#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) -#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) - -#define S_IPERMISSION_MASK 0x1FF /* */ - -#define S_IRWXU 0x1C0 /* 0 0700 */ -#define S_IRUSR 0x100 /* 0 0400 */ -#define S_IWUSR 0x080 /* 0 0200 */ -#define S_IXUSR 0x040 /* 0 0100 */ - -#define S_IRWXG 0x038 /* 0 0070 */ -#define S_IRGRP 0x020 /* 0 0040 */ -#define S_IWGRP 0x010 /* 0 0020 */ -#define S_IXGRP 0x008 /* 0 0010 */ - -#define S_IRWXO 0x007 /* 0 0007 */ -#define S_IROTH 0x004 /* 0 0004 */ -#define S_IWOTH 0x002 /* 0 0002 */ -#define S_IXOTH 0x001 /* 0 0001 */ - -#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) -#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO) -#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH) -#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH) -#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH) - -/* - * linux ioctl coding definitions - */ - -#define _IOC_NRBITS 8 -#define _IOC_TYPEBITS 8 -#define _IOC_SIZEBITS 14 -#define _IOC_DIRBITS 2 - -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) -#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) -#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) -#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) - -#define _IOC_NRSHIFT 0 -#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) -#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) -#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) - -/* - * Direction bits. - */ -#define _IOC_NONE 0U -#define _IOC_WRITE 1U -#define _IOC_READ 2U - -#define _IOC(dir,type,nr,size) \ - (((dir) << _IOC_DIRSHIFT) | \ - ((type) << _IOC_TYPESHIFT) | \ - ((nr) << _IOC_NRSHIFT) | \ - ((size) << _IOC_SIZESHIFT)) - -/* used to create numbers */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) - -/* used to decode ioctl numbers.. */ -#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) -#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) -#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) - -/* - * Io vector ... - */ - -struct iovec -{ - void *iov_base; - size_t iov_len; -}; - - -#define ULONG_LONG_MAX ((__u64)(0xFFFFFFFFFFFFFFFF)) -/* - * Convert a string to an unsigned long long integer. - * - * Ignores `locale' stuff. Assumes that the upper and lower case - * alphabets and digits are each contiguous. - */ -static inline __u64 -strtoull( - char *nptr, - char **endptr, - int base) -{ - char *s = nptr; - __u64 acc, cutoff; - int c, neg = 0, any, cutlim; - - /* - * See strtol for comments as to the logic used. - */ - do { - c = *s++; - } while (isspace(c)); - if (c == '-') { - neg = 1; - c = *s++; - } else if (c == '+') - c = *s++; - if ((base == 0 || base == 16) && - c == '0' && (*s == 'x' || *s == 'X')) { - c = s[1]; - s += 2; - base = 16; - } - if (base == 0) - base = c == '0' ? 8 : 10; - cutoff = (__u64)ULONG_LONG_MAX / (__u64)base; - cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base); - for (acc = 0, any = 0;; c = *s++) { - if (isdigit(c)) - c -= '0'; - else if (isalpha(c)) - c -= isupper(c) ? 'A' - 10 : 'a' - 10; - else - break; - if (c >= base) - break; - if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) - any = -1; - else { - any = 1; - acc *= base; - acc += c; - } - } - if (any < 0) { - acc = ULONG_LONG_MAX; - } else if (neg) - acc = 0 - acc; - if (endptr != 0) - *endptr = (char *) (any ? s - 1 : nptr); - return (acc); -} - -#endif diff --git a/lnet/include/libcfs/winnt/winnt-tcpip.h b/lnet/include/libcfs/winnt/winnt-tcpip.h deleted file mode 100644 index a988247c6b5bf9be9fe51c25380714809a2180e0..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-tcpip.h +++ /dev/null @@ -1,660 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for Winnt (kernel and user-level). - * - */ - -#ifndef __LIBCFS_WINNT_TCPIP_H__ -#define __LIBCFS_WINNT_TCPIP_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - - -#ifdef __KERNEL__ - -// -// ks definitions -// - -// iovec is defined in libcfs: winnt_prim.h -// lnetkiov_t is defined in lnet/types.h - -typedef struct socket ksock_tconn_t; -typedef struct socket cfs_socket_t; - -// completion notification callback routine - -typedef VOID (*ksock_schedule_cb)(struct socket*, int, void *, ulong_ptr); - -/* completion routine to update tx structure for async sending */ -typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr); - -// -// tdinal definitions -// - - -#if TDI_LIBCFS_DBG -#define KsPrint(X) KsPrintf X -#else -#define KsPrint(X) -#endif - - -// -// Socket Addresses Related ... -// - -#define INADDR_ANY (ULONG)0x00000000 -#define INADDR_LOOPBACK (ULONG)0x7f000001 -#define INADDR_BROADCAST (ULONG)0xffffffff -#define INADDR_NONE (ULONG)0xffffffff - -/* - * TCP / IP options - */ - -#define SOL_TCP 6 -#define SOL_UDP 17 - - -#define TL_INSTANCE 0 - -#define TCP_SOCKET_NODELAY 1 // disabling "Nagle" -#define TCP_SOCKET_KEEPALIVE 2 -#define TCP_SOCKET_OOBINLINE 3 -#define TCP_SOCKET_BSDURGENT 4 -#define TCP_SOCKET_ATMARK 5 -#define TCP_SOCKET_WINDOW 6 - - -/* Flags we can use with send/ and recv. - Added those for 1003.1g not all are supported yet - */ - -#define MSG_OOB 1 -#define MSG_PEEK 2 -#define MSG_DONTROUTE 4 -#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ -#define MSG_CTRUNC 8 -#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */ -#define MSG_TRUNC 0x20 -#define MSG_DONTWAIT 0x40 /* Nonblocking io */ -#define MSG_EOR 0x80 /* End of record */ -#define MSG_WAITALL 0x100 /* Wait for a full request */ -#define MSG_FIN 0x200 -#define MSG_SYN 0x400 -#define MSG_CONFIRM 0x800 /* Confirm path validity */ -#define MSG_RST 0x1000 -#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ -#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ -#define MSG_MORE 0x8000 /* Sender will send more */ - -#define MSG_EOF MSG_FIN - - -// -// Maximum TRANSPORT_ADDRESS Length -// -// it must >= FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) -// + TDI_ADDRESS_LENGTH_IP -// -// I define it a little large and 16 bytes aligned to avoid possible overflow. -// - -#define MAX_ADDRESS_LENGTH (0x30) - - -// -// Maximum Listers Children Sockets -// - -#define MAX_CHILD_LISTENERS (4) - -// -// Maximum EA Information Length -// - -#define EA_MAX_LENGTH ( sizeof(FILE_FULL_EA_INFORMATION) - 1 + \ - TDI_TRANSPORT_ADDRESS_LENGTH + 1 + \ - MAX_ADDRESS_LENGTH ) - - -#define UDP_DEVICE_NAME L"\\Device\\Udp" -#define TCP_DEVICE_NAME L"\\Device\\Tcp" - - -/* - * TSDU definitions - */ - -#define TDINAL_TSDU_DEFAULT_SIZE (0x10000) - -#define KS_TSDU_MAGIC 'KSTD' - -#define KS_TSDU_ATTACHED 0x00000001 // Attached to the socket receive tsdu list - -typedef struct _KS_TSDU { - - ULONG Magic; - ULONG Flags; - - struct list_head Link; - - ULONG TotalLength; // Total size of KS_TSDU - - ULONG StartOffset; // Start offset of the first Tsdu unit - ULONG LastOffset; // End offset of the last Tsdu unit - -/* - union { - KS_TSDU_DAT[]; - KS_TSDU_BUF[]; - KS_TSDU_MDL[]; - } -*/ - -} KS_TSDU, *PKS_TSDU; - -#define TSDU_TYPE_BUF ((USHORT)0x5401) -#define TSDU_TYPE_DAT ((USHORT)0x5402) -#define TSDU_TYPE_MDL ((USHORT)0x5403) - -#define KS_TSDU_BUF_RECEIVING 0x0001 -typedef struct _KS_TSDU_BUF { - - USHORT TsduType; - USHORT TsduFlags; - - ULONG DataLength; - ULONG StartOffset; - - PVOID UserBuffer; - -} KS_TSDU_BUF, *PKS_TSDU_BUF; - -#define KS_TSDU_DAT_RECEIVING 0x0001 - -typedef struct _KS_TSDU_DAT { - - USHORT TsduType; - USHORT TsduFlags; - - ULONG DataLength; - ULONG StartOffset; - - ULONG TotalLength; - - UCHAR Data[1]; - -} KS_TSDU_DAT, *PKS_TSDU_DAT; - -#define KS_DWORD_ALIGN(x) (((x) + 0x03) & (~(0x03))) -#define KS_TSDU_STRU_SIZE(Len) (KS_DWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data))) - -typedef struct _KS_TSDU_MDL { - - USHORT TsduType; - USHORT TsduFlags; - - ULONG DataLength; - ULONG StartOffset; - - PMDL Mdl; - PVOID Descriptor; - -} KS_TSDU_MDL, *PKS_TSDU_MDL; - - -typedef struct _KS_TSDUMGR { - - struct list_head TsduList; - ULONG NumOfTsdu; - ULONG TotalBytes; - KEVENT Event; - -} KS_TSDUMGR, *PKS_TSDUMGR; - - -typedef struct _KS_CHAIN { - - KS_TSDUMGR Normal; - KS_TSDUMGR Expedited; - -} KS_CHAIN, *PKS_CHAIN; - - -#define TDINAL_SCHED_FACTOR (1) -#define CAN_BE_SCHED(Len, Limit) (Len >= ((Limit) >> TDINAL_SCHED_FACTOR)) - -// -// Handler Settings Indictor -// - -#define TDI_EVENT_MAXIMUM_HANDLER (TDI_EVENT_ERROR_EX + 1) - - -typedef struct _KS_EVENT_HANDLERS { - BOOLEAN IsActive[TDI_EVENT_MAXIMUM_HANDLER]; - PVOID Handler [TDI_EVENT_MAXIMUM_HANDLER]; -} KS_EVENT_HANDLERS, *PKS_EVENT_HANDLERS; - -#define SetEventHandler(ha, ht, hr) do { \ - ha.IsActive[ht] = TRUE; \ - ha.Handler[ht] = (PVOID) (hr); \ - } while(0) - -// -// KSock Internal Structures -// - -typedef struct _KS_ADDRESS { - - union { - TRANSPORT_ADDRESS Tdi; - UCHAR Pading[MAX_ADDRESS_LENGTH]; - }; - - HANDLE Handle; - PFILE_OBJECT FileObject; - -} KS_ADDRESS, *PKS_ADDRESS; - -// -// Structures for Disconnect Workitem -// - -typedef struct _KS_DISCONNECT_WORKITEM { - - WORK_QUEUE_ITEM WorkItem; // Workitem to perform disconnection - ksock_tconn_t * tconn; // tdi connecton - ULONG Flags; // connection broken/discnnection flags - KEVENT Event; // sync event - -} KS_DISCONNECT_WORKITEM, *PKS_DISCONNECT_WORKITEM; - - -typedef struct _KS_CONNECTION { - - HANDLE Handle; // Handle of the tdi connection - PFILE_OBJECT FileObject; // FileObject if the conn object - - PTRANSPORT_ADDRESS Remote; // the ConnectionInfo of this connection - PTDI_CONNECTION_INFORMATION ConnectionInfo; - - ULONG nagle; // Tcp options - -} KS_CONNECTION, *PKS_CONNECTION; - - -// -// type definitions -// - -typedef MDL ksock_mdl_t; -typedef UNICODE_STRING ksock_unicode_name_t; -typedef WORK_QUEUE_ITEM ksock_workitem_t; - - -typedef KS_CHAIN ksock_chain_t; -typedef KS_ADDRESS ksock_tdi_addr_t; -typedef KS_CONNECTION ksock_tconn_info_t; -typedef KS_DISCONNECT_WORKITEM ksock_disconnect_workitem_t; - - -// -// Structures for transmission done Workitem -// - -typedef struct _KS_TCPX_FINILIZE { - ksock_workitem_t item; - void * tx; -} ksock_tcpx_fini_t; - - -typedef struct ksock_backlogs { - - struct list_head list; /* list to link the backlog connections */ - int num; /* number of backlogs in the list */ - -} ksock_backlogs_t; - - -typedef struct ksock_daemon { - - ksock_tconn_t * tconn; /* the listener connection object */ - unsigned short nbacklogs; /* number of listening backlog conns */ - unsigned short port; /* listening port number */ - int shutdown; /* daemon threads is to exit */ - struct list_head list; /* to be attached into ksock_nal_data_t*/ - -} ksock_daemon_t ; - - -typedef enum { - - kstt_sender = 0, // normal sending connection type, it's active connection, while - // child tconn is for passive connection. - - kstt_listener, // listener daemon type, it just acts as a daemon, and it does - // not have real connection. It manages children tcons to accept - // or refuse the connecting request from remote peers. - - kstt_child, // accepted child connection type, it's parent must be Listener - kstt_lasttype -} ksock_tconn_type; - -typedef enum { - - ksts_uninited = 0, // tconn is just allocated (zero values), not initialized yet - - ksts_inited, // tconn structure initialized: so it now can be identified as - // a sender, listener or a child - - ksts_bind, // tconn is bound: the local address object (ip/port) is created. - // after being bound, we must call ksocknal_put_tconn to release - // the tconn objects, it's not safe just to free the memory of tconn. - - ksts_associated, // the connection object is created and associated with the address - // object. so it's ready for connection. only for child and sender. - - ksts_connecting, // only used by child tconn: in the ConnectEvent handler routine, - // it indicts the child tconn is busy to be connected to the peer. - - ksts_connected, // the connection is built already: for sender and child - - ksts_listening, // listener daemon is working, only for listener tconn - - ksts_disconnected, // disconnected by user - ksts_aborted, // un-exptected broken status - - ksts_last // total number of tconn statuses -} ksock_tconn_state; - -#define KS_TCONN_MAGIC 'KSTM' - -#define KS_TCONN_HANDLERS_SET 0x00000001 // Conection handlers are set. -#define KS_TCONN_DISCONNECT_BUSY 0x00010000 // Disconnect Workitem is queued ... -#define KS_TCONN_DESTROY_BUSY 0x00020000 // Destory Workitem is queued ... - -#define KS_TCONN_DAEMON_STARTED 0x00100000 // indict the daemon is started, - // only valid for listener - -struct socket { - - ulong_ptr kstc_magic; /* Magic & Flags */ - ulong_ptr kstc_flags; - - spinlock_t kstc_lock; /* serialise lock*/ - void * kstc_conn; /* ksock_conn_t */ - - ksock_tconn_type kstc_type; /* tdi connection Type */ - ksock_tconn_state kstc_state; /* tdi connection state flag */ - - ksock_unicode_name_t kstc_dev; /* tcp transport device name */ - - ksock_tdi_addr_t kstc_addr; /* local address handlers / Objects */ - - atomic_t kstc_refcount; /* reference count of ksock_tconn */ - - struct list_head kstc_list; /* linked to global ksocknal_data */ - - union { - - struct { - int nbacklog; /* total number of backlog tdi connections */ - ksock_backlogs_t kstc_listening; /* listeing backlog child connections */ - ksock_backlogs_t kstc_accepted; /* connected backlog child connections */ - event_t kstc_accept_event; /* Signaled by AcceptedHander, - ksocknal_wait_accpeted_conns waits on */ - event_t kstc_destroy_event; /* Signaled when accepted child is released */ - } listener; - - struct { - ksock_tconn_info_t kstc_info; /* Connection Info if Connected */ - ksock_chain_t kstc_recv; /* tsdu engine for data receiving */ - ksock_chain_t kstc_send; /* tsdu engine for data sending */ - - int kstc_queued; /* Attached to Parent->ChildList ... */ - int kstc_queueno; /* 0: Attached to Listening list - 1: Attached to Accepted list */ - - int kstc_busy; /* referred by ConnectEventCallback ? */ - int kstc_accepted; /* the connection is built ready ? */ - - struct list_head kstc_link; /* linked to parent tdi connection */ - ksock_tconn_t * kstc_parent; /* pointers to it's listener parent */ - } child; - - struct { - ksock_tconn_info_t kstc_info; /* Connection Info if Connected */ - ksock_chain_t kstc_recv; /* tsdu engine for data receiving */ - ksock_chain_t kstc_send; /* tsdu engine for data sending */ - } sender; - }; - - ulong_ptr kstc_snd_wnd; /* Sending window size */ - ulong_ptr kstc_rcv_wnd; /* Recving window size */ - - ksock_workitem_t kstc_destroy; /* tconn destruction workitem */ - ksock_disconnect_workitem_t kstc_disconnect; /* connection disconnect workitem */ - - ksock_schedule_cb kstc_sched_cb; /* notification callback routine of completion */ - ksock_update_tx kstc_update_tx; /* aync sending callback to update tx */ -}; - -#define SOCK_WMEM_QUEUED(sock) (0) - -#define TDINAL_WINDOW_DEFAULT_SIZE (0x100000) - - -struct _KS_UDP_COMPLETION_CONTEXT; -struct _KS_TCP_COMPLETION_CONTEXT; - - -typedef -NTSTATUS -(*PKS_UDP_COMPLETION_ROUTINE) ( - IN PIRP Irp, - IN struct _KS_UDP_COMPLETION_CONTEXT - *UdpContext - ); - - -typedef -NTSTATUS -(*PKS_TCP_COMPLETION_ROUTINE) ( - IN PIRP Irp, - IN struct _KS_TCP_COMPLETION_CONTEXT - *TcpContext - ); - -// -// Udp Irp Completion Context -// - -typedef struct _KS_UDP_COMPLETION_CONTEXT { - - PKEVENT Event; - union { - PFILE_OBJECT AddressObject; - ksock_tconn_t * tconn; - }; - - PKS_UDP_COMPLETION_ROUTINE CompletionRoutine; - PVOID CompletionContext; - -} KS_UDP_COMPLETION_CONTEXT, *PKS_UDP_COMPLETION_CONTEXT; - - -// -// Tcp Irp Completion Context (used by tcp data recv/send) -// - -typedef struct _KS_TCP_COMPLETION_CONTEXT { - - PKEVENT Event; // Event to be waited on by Irp caller ... - - ksock_tconn_t * tconn; // the tdi connection - - PKS_TCP_COMPLETION_ROUTINE CompletionRoutine; - PVOID CompletionContext; - PVOID CompletionContext2; - - PKS_TSDUMGR KsTsduMgr; // Tsdu buffer manager - - // - // These tow new members are for NON_BLOCKING transmission - // - - BOOLEAN bCounted; // To indict needing refcount to - // execute CompetionRoutine - ULONG ReferCount; // Refer count of this structure - -} KS_TCP_COMPLETION_CONTEXT, *PKS_TCP_COMPLETION_CONTEXT; - -typedef KS_TCP_COMPLETION_CONTEXT ksock_tdi_tx_t, ksock_tdi_rx_t; - - -/* - * tdi extensions - */ - -#define IOCTL_TCP_QUERY_INFORMATION_EX \ - CTL_CODE(FILE_DEVICE_NETWORK, 0, METHOD_NEITHER, FILE_ANY_ACCESS) -#define IOCTL_TCP_SET_INFORMATION_EX \ - CTL_CODE(FILE_DEVICE_NETWORK, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS) - - -#define TcpBuildSetInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, Buffer, BufferLen)\ - { \ - PIO_STACK_LOCATION _IRPSP; \ - if ( CompRoutine != NULL) { \ - IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\ - } else { \ - IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \ - } \ - _IRPSP = IoGetNextIrpStackLocation (Irp); \ - _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \ - _IRPSP->DeviceObject = DevObj; \ - _IRPSP->FileObject = FileObj; \ - _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = 0; \ - _IRPSP->Parameters.DeviceIoControl.InputBufferLength = BufferLen; \ - _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_SET_INFORMATION_EX; \ - Irp->AssociatedIrp.SystemBuffer = Buffer; \ - } - - -#define TcpBuildQueryInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, InBuffer, InLength, OutBuffer, OutLength)\ - { \ - PIO_STACK_LOCATION _IRPSP; \ - if ( CompRoutine != NULL) { \ - IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\ - } else { \ - IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \ - } \ - _IRPSP = IoGetNextIrpStackLocation (Irp); \ - _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \ - _IRPSP->DeviceObject = DevObj; \ - _IRPSP->FileObject = FileObj; \ - _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = OutLength; \ - _IRPSP->Parameters.DeviceIoControl.InputBufferLength = InLength; \ - _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_QUERY_INFORMATION_EX; \ - _IRPSP->Parameters.DeviceIoControl.Type3InputBuffer = InBuffer; \ - Irp->UserBuffer = OutBuffer; \ - } - - -typedef struct ks_addr_slot { - LIST_ENTRY link; - int up; - char iface[40]; - __u32 ip_addr; - __u32 netmask; - UNICODE_STRING devname; - WCHAR buffer[1]; -} ks_addr_slot_t; - -typedef struct { - - /* - * Tdi client information - */ - - UNICODE_STRING ksnd_client_name; /* tdi client module name */ - HANDLE ksnd_pnp_handle; /* the handle for pnp changes */ - - spinlock_t ksnd_addrs_lock; /* serialize ip address list access */ - LIST_ENTRY ksnd_addrs_list; /* list of the ip addresses */ - int ksnd_naddrs; /* number of the ip addresses */ - - /* - * Tdilnd internal defintions - */ - - int ksnd_init; /* initialisation state */ - - TDI_PROVIDER_INFO ksnd_provider; /* tdi tcp/ip provider's information */ - - spinlock_t ksnd_tconn_lock; /* tdi connections access serialise */ - - int ksnd_ntconns; /* number of tconns attached in list */ - struct list_head ksnd_tconns; /* tdi connections list */ - cfs_mem_cache_t * ksnd_tconn_slab; /* slabs for ksock_tconn_t allocations */ - event_t ksnd_tconn_exit; /* exit event to be signaled by the last tconn */ - - spinlock_t ksnd_tsdu_lock; /* tsdu access serialise */ - - int ksnd_ntsdus; /* number of tsdu buffers allocated */ - ulong_ptr ksnd_tsdu_size; /* the size of a signel tsdu buffer */ - cfs_mem_cache_t * ksnd_tsdu_slab; /* slab cache for tsdu buffer allocation */ - - int ksnd_nfreetsdus; /* number of tsdu buffers in the freed list */ - struct list_head ksnd_freetsdus; /* List of the freed Tsdu buffer. */ - - spinlock_t ksnd_daemon_lock; /* stabilize daemon ops */ - int ksnd_ndaemons; /* number of listening daemons */ - struct list_head ksnd_daemons; /* listening daemon list */ - event_t ksnd_daemon_exit; /* the last daemon quiting should singal it */ - -} ks_data_t; - -int -ks_init_tdi_data(); - -void -ks_fini_tdi_data(); - - -#endif /* __KERNEL__ */ -#endif /* __LIBCFS_WINNT_TCPIP_H__ */ - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/winnt/winnt-time.h b/lnet/include/libcfs/winnt/winnt-time.h deleted file mode 100644 index a7a570cf4083f377867366b0678a33c717671241..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-time.h +++ /dev/null @@ -1,316 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for Winnt (kernel and user-level). - * - */ - -#ifndef __LIBCFS_WINNT_LINUX_TIME_H__ -#define __LIBCFS_WINNT_LINUX_TIME_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Portable time API */ - -/* - * Platform provides three opaque data-types: - * - * cfs_time_t represents point in time. This is internal kernel - * time rather than "wall clock". This time bears no - * relation to gettimeofday(). - * - * cfs_duration_t represents time interval with resolution of internal - * platform clock - * - * cfs_fs_time_t represents instance in world-visible time. This is - * used in file-system time-stamps - * - * cfs_time_t cfs_time_current(void); - * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); - * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); - * int cfs_time_before (cfs_time_t, cfs_time_t); - * int cfs_time_beforeq(cfs_time_t, cfs_time_t); - * - * cfs_duration_t cfs_duration_build(int64_t); - * - * time_t cfs_duration_sec (cfs_duration_t); - * void cfs_duration_usec(cfs_duration_t, struct timeval *); - * void cfs_duration_nsec(cfs_duration_t, struct timespec *); - * - * void cfs_fs_time_current(cfs_fs_time_t *); - * time_t cfs_fs_time_sec (cfs_fs_time_t *); - * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); - * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); - * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); - * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); - * - * CFS_TIME_FORMAT - * CFS_DURATION_FORMAT - * - */ - -#define ONE_BILLION ((u_int64_t)1000000000) -#define ONE_MILLION ((u_int64_t) 1000000) - -#define HZ (100) - -struct timeval { - time_t tv_sec; /* seconds */ - suseconds_t tv_usec; /* microseconds */ -}; - -struct timespec { - ulong_ptr tv_sec; - ulong_ptr tv_nsec; -}; - -#ifdef __KERNEL__ - -#include <libcfs/winnt/portals_compat25.h> - -/* - * Generic kernel stuff - */ - -typedef struct timeval cfs_fs_time_t; - -typedef u_int64_t cfs_time_t; -typedef int64_t cfs_duration_t; - -static inline void do_gettimeofday(struct timeval *tv) -{ - LARGE_INTEGER Time; - - KeQuerySystemTime(&Time); - - tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000); - tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10; -} - -static inline cfs_time_t JIFFIES() -{ - LARGE_INTEGER Tick; - LARGE_INTEGER Elapse; - - KeQueryTickCount(&Tick); - - Elapse.QuadPart = Tick.QuadPart * KeQueryTimeIncrement(); - Elapse.QuadPart /= (10000000 / HZ); - - return Elapse.QuadPart; -} - -static inline cfs_time_t cfs_time_current(void) -{ - return JIFFIES(); -} - -static inline cfs_time_t cfs_time_current_sec(void) -{ - return (JIFFIES() / HZ); -} - -static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) -{ - return (t + d); -} - -static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) -{ - return (t1 - t2); -} - -static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) -{ - return ((int64_t)t1 - (int64_t)t2) < 0; -} - -static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) -{ - return ((int64_t)t1 - (int64_t)t2) <= 0; -} - -static inline void cfs_fs_time_current(cfs_fs_time_t *t) -{ - ULONG Linux; - LARGE_INTEGER Sys; - - KeQuerySystemTime(&Sys); - - RtlTimeToSecondsSince1970(&Sys, &Linux); - - t->tv_sec = Linux; - t->tv_usec = (Sys.LowPart % 10000000) / 10; -} - -static inline cfs_time_t cfs_fs_time_sec(cfs_fs_time_t *t) -{ - return t->tv_sec; -} - -static inline u_int64_t __cfs_fs_time_flat(cfs_fs_time_t *t) -{ - return ((u_int64_t)t->tv_sec) * ONE_MILLION + t->tv_usec; -} - -static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return (__cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2)); -} - -static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return (__cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2)); -} - -static inline cfs_duration_t cfs_time_seconds(int seconds) -{ - return (cfs_duration_t)seconds * HZ; -} - -static inline cfs_time_t cfs_duration_sec(cfs_duration_t d) -{ - return d / HZ; -} - -static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) -{ - s->tv_sec = (suseconds_t) (d / HZ); - s->tv_usec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) * - ONE_MILLION / HZ); -} - -static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) -{ - s->tv_sec = (suseconds_t) (d / HZ); - s->tv_nsec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) * - ONE_BILLION / HZ); -} - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - *v = *t; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - s->tv_sec = t->tv_sec; - s->tv_nsec = t->tv_usec * 1000; -} - -#define cfs_time_current_64 cfs_time_current -#define cfs_time_add_64 cfs_time_add -#define cfs_time_shift_64 cfs_time_shift -#define cfs_time_before_64 cfs_time_before -#define cfs_time_beforeq_64 cfs_time_beforeq - -/* - * One jiffy - */ -#define CFS_TICK (1) - -#define LTIME_S(t) (t) - -#define CFS_TIME_T "%I64u" -#define CFS_DURATION_T "%I64d" - -#else /* !__KERNEL__ */ - -/* - * Liblustre. time(2) based implementation. - */ -#include <libcfs/user-time.h> - - -// -// Time routines ... -// - -NTSYSAPI -CCHAR -NTAPI -NtQuerySystemTime( - OUT PLARGE_INTEGER CurrentTime - ); - - -NTSYSAPI -BOOLEAN -NTAPI -RtlTimeToSecondsSince1970( - IN PLARGE_INTEGER Time, - OUT PULONG ElapsedSeconds - ); - - -NTSYSAPI -VOID -NTAPI -RtlSecondsSince1970ToTime( - IN ULONG ElapsedSeconds, - OUT PLARGE_INTEGER Time - ); - -NTSYSAPI -VOID -NTAPI -Sleep( - DWORD dwMilliseconds // sleep time in milliseconds -); - - -static inline void sleep(int time) -{ - DWORD Time = 1000 * time; - Sleep(Time); -} - - -static inline void do_gettimeofday(struct timeval *tv) -{ - LARGE_INTEGER Time; - - NtQuerySystemTime(&Time); - - tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000); - tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10; -} - -static inline int gettimeofday(struct timeval *tv, void * tz) -{ - do_gettimeofday(tv); - return 0; -} - -#endif /* __KERNEL__ */ - -/* __LIBCFS_LINUX_LINUX_TIME_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/winnt/winnt-types.h b/lnet/include/libcfs/winnt/winnt-types.h deleted file mode 100644 index b50b6bb6f2d5f31e46c0dca5b89af1512ba425cb..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-types.h +++ /dev/null @@ -1,647 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic types definitions - * - */ - -#ifndef _WINNT_TYPE_H -#define _WINNT_TYPE_H - -#ifdef __KERNEL__ - -#include <ntifs.h> -#include <windef.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> - -#include <tdi.h> -#include <tdikrnl.h> -#include <tdiinfo.h> - -#else - -#include <ntddk.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> -#include <time.h> -#include <io.h> -#include <string.h> -#include <assert.h> - -#endif - - -#define __LITTLE_ENDIAN - -#define inline __inline -#define __inline__ __inline - -typedef unsigned __int8 __u8; -typedef signed __int8 __s8; - -typedef signed __int64 __s64; -typedef unsigned __int64 __u64; - -typedef signed __int16 __s16; -typedef unsigned __int16 __u16; - -typedef signed __int32 __s32; -typedef unsigned __int32 __u32; - -typedef signed __int64 __s64; -typedef unsigned __int64 __u64; - -typedef unsigned long ULONG; - - -#if defined(_WIN64) - #define long_ptr __int64 - #define ulong_ptr unsigned __int64 - #define BITS_PER_LONG (64) -#else - #define long_ptr long - #define ulong_ptr unsigned long - #define BITS_PER_LONG (32) - -#endif - -/* bsd */ -typedef unsigned char u_char; -typedef unsigned short u_short; -typedef unsigned int u_int; -typedef unsigned long u_long; - -/* sysv */ -typedef unsigned char unchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; - -#ifndef __BIT_TYPES_DEFINED__ -#define __BIT_TYPES_DEFINED__ - -typedef __u8 u_int8_t; -typedef __s8 int8_t; -typedef __u16 u_int16_t; -typedef __s16 int16_t; -typedef __u32 u_int32_t; -typedef __s32 int32_t; - -#endif /* !(__BIT_TYPES_DEFINED__) */ - -typedef __u8 uint8_t; -typedef __u16 uint16_t; -typedef __u32 uint32_t; - -typedef __u64 uint64_t; -typedef __u64 u_int64_t; -typedef __s64 int64_t; - -typedef long ssize_t; - -typedef __u32 suseconds_t; - -typedef __u32 pid_t, tid_t; - -typedef __u16 uid_t, gid_t; - -typedef __u16 mode_t; -typedef __u16 umode_t; - -typedef ulong_ptr sigset_t; - -typedef uint64_t loff_t; -typedef HANDLE cfs_handle_t; -typedef uint64_t cycles_t; - -#ifndef INVALID_HANDLE_VALUE -#define INVALID_HANDLE_VALUE ((HANDLE)-1) -#endif - - -#ifdef __KERNEL__ /* kernel */ - -typedef __u32 off_t; -typedef __u32 time_t; - -typedef unsigned short kdev_t; - -#else /* !__KERNEL__ */ - -typedef int BOOL; -typedef __u8 BYTE; -typedef __u16 WORD; -typedef __u32 DWORD; - -#endif /* __KERNEL__ */ - -/* - * Conastants suffix - */ - -#define ULL i64 -#define ull i64 - -/* - * Winnt kernel has no capabilities. - */ - -typedef __u32 cfs_kernel_cap_t; - -#define INT_MAX ((int)(~0U>>1)) -#define INT_MIN (-INT_MAX - 1) -#define UINT_MAX (~0U) - -#endif /* _WINNT_TYPES_H */ - - -/* - * Bytes order - */ - -// -// Byte order swapping routines -// - - -#define ___swab16(x) RtlUshortByteSwap(x) -#define ___swab32(x) RtlUlongByteSwap(x) -#define ___swab64(x) RtlUlonglongByteSwap(x) - -#define ___constant_swab16(x) \ - ((__u16)( \ - (((__u16)(x) & (__u16)0x00ffU) << 8) | \ - (((__u16)(x) & (__u16)0xff00U) >> 8) )) - -#define ___constant_swab32(x) \ - ((__u32)( \ - (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \ - (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \ - (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \ - (((__u32)(x) & (__u32)0xff000000UL) >> 24) )) - -#define ___constant_swab64(x) \ - ((__u64)( \ - (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUL) << 56) | \ - (__u64)(((__u64)(x) & (__u64)0x000000000000ff00UL) << 40) | \ - (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000UL) << 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00000000ff000000UL) << 8) | \ - (__u64)(((__u64)(x) & (__u64)0x000000ff00000000UL) >> 8) | \ - (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000UL) >> 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00ff000000000000UL) >> 40) | \ - (__u64)(((__u64)(x) & (__u64)0xff00000000000000UL) >> 56) )) - - -#define __swab16(x) ___constant_swab16(x) -#define __swab32(x) ___constant_swab32(x) -#define __swab64(x) ___constant_swab64(x) - -#define __swab16s(x) do { *(x) = __swab16((USHORT)(*(x)));} while(0) -#define __swab32s(x) do { *(x) = __swab32((ULONG)(*(x)));} while(0) -#define __swab64s(x) do { *(x) = __swab64((ULONGLONG)(*(x)));} while(0) - -#define __constant_htonl(x) ___constant_swab32((x)) -#define __constant_ntohl(x) ___constant_swab32((x)) -#define __constant_htons(x) ___constant_swab16((x)) -#define __constant_ntohs(x) ___constant_swab16((x)) -#define __constant_cpu_to_le64(x) ((__u64)(x)) -#define __constant_le64_to_cpu(x) ((__u64)(x)) -#define __constant_cpu_to_le32(x) ((__u32)(x)) -#define __constant_le32_to_cpu(x) ((__u32)(x)) -#define __constant_cpu_to_le16(x) ((__u16)(x)) -#define __constant_le16_to_cpu(x) ((__u16)(x)) -#define __constant_cpu_to_be64(x) ___constant_swab64((x)) -#define __constant_be64_to_cpu(x) ___constant_swab64((x)) -#define __constant_cpu_to_be32(x) ___constant_swab32((x)) -#define __constant_be32_to_cpu(x) ___constant_swab32((x)) -#define __constant_cpu_to_be16(x) ___constant_swab16((x)) -#define __constant_be16_to_cpu(x) ___constant_swab16((x)) -#define __cpu_to_le64(x) ((__u64)(x)) -#define __le64_to_cpu(x) ((__u64)(x)) -#define __cpu_to_le32(x) ((__u32)(x)) -#define __le32_to_cpu(x) ((__u32)(x)) -#define __cpu_to_le16(x) ((__u16)(x)) -#define __le16_to_cpu(x) ((__u16)(x)) -#define __cpu_to_be64(x) __swab64((x)) -#define __be64_to_cpu(x) __swab64((x)) -#define __cpu_to_be32(x) __swab32((x)) -#define __be32_to_cpu(x) __swab32((x)) -#define __cpu_to_be16(x) __swab16((x)) -#define __be16_to_cpu(x) __swab16((x)) -#define __cpu_to_le64p(x) (*(__u64*)(x)) -#define __le64_to_cpup(x) (*(__u64*)(x)) -#define __cpu_to_le32p(x) (*(__u32*)(x)) -#define __le32_to_cpup(x) (*(__u32*)(x)) -#define __cpu_to_le16p(x) (*(__u16*)(x)) -#define __le16_to_cpup(x) (*(__u16*)(x)) -#define __cpu_to_be64p(x) __swab64p((x)) -#define __be64_to_cpup(x) __swab64p((x)) -#define __cpu_to_be32p(x) __swab32p((x)) -#define __be32_to_cpup(x) __swab32p((x)) -#define __cpu_to_be16p(x) __swab16p((x)) -#define __be16_to_cpup(x) __swab16p((x)) -#define __cpu_to_le64s(x) do {} while (0) -#define __le64_to_cpus(x) do {} while (0) -#define __cpu_to_le32s(x) do {} while (0) -#define __le32_to_cpus(x) do {} while (0) -#define __cpu_to_le16s(x) do {} while (0) -#define __le16_to_cpus(x) do {} while (0) -#define __cpu_to_be64s(x) __swab64s((x)) -#define __be64_to_cpus(x) __swab64s((x)) -#define __cpu_to_be32s(x) __swab32s((x)) -#define __be32_to_cpus(x) __swab32s((x)) -#define __cpu_to_be16s(x) __swab16s((x)) -#define __be16_to_cpus(x) __swab16s((x)) - -#ifndef cpu_to_le64 -#define cpu_to_le64 __cpu_to_le64 -#define le64_to_cpu __le64_to_cpu -#define cpu_to_le32 __cpu_to_le32 -#define le32_to_cpu __le32_to_cpu -#define cpu_to_le16 __cpu_to_le16 -#define le16_to_cpu __le16_to_cpu -#endif - -#define cpu_to_be64 __cpu_to_be64 -#define be64_to_cpu __be64_to_cpu -#define cpu_to_be32 __cpu_to_be32 -#define be32_to_cpu __be32_to_cpu -#define cpu_to_be16 __cpu_to_be16 -#define be16_to_cpu __be16_to_cpu -#define cpu_to_le64p __cpu_to_le64p -#define le64_to_cpup __le64_to_cpup -#define cpu_to_le32p __cpu_to_le32p -#define le32_to_cpup __le32_to_cpup -#define cpu_to_le16p __cpu_to_le16p -#define le16_to_cpup __le16_to_cpup -#define cpu_to_be64p __cpu_to_be64p -#define be64_to_cpup __be64_to_cpup -#define cpu_to_be32p __cpu_to_be32p -#define be32_to_cpup __be32_to_cpup -#define cpu_to_be16p __cpu_to_be16p -#define be16_to_cpup __be16_to_cpup -#define cpu_to_le64s __cpu_to_le64s -#define le64_to_cpus __le64_to_cpus -#define cpu_to_le32s __cpu_to_le32s -#define le32_to_cpus __le32_to_cpus -#define cpu_to_le16s __cpu_to_le16s -#define le16_to_cpus __le16_to_cpus -#define cpu_to_be64s __cpu_to_be64s -#define be64_to_cpus __be64_to_cpus -#define cpu_to_be32s __cpu_to_be32s -#define be32_to_cpus __be32_to_cpus -#define cpu_to_be16s __cpu_to_be16s -#define be16_to_cpus __be16_to_cpus - - -// -// Network to host byte swap functions -// - -#define ntohl(x) ( ( ( ( x ) & 0x000000ff ) << 24 ) | \ - ( ( ( x ) & 0x0000ff00 ) << 8 ) | \ - ( ( ( x ) & 0x00ff0000 ) >> 8 ) | \ - ( ( ( x ) & 0xff000000 ) >> 24 ) ) - -#define ntohs(x) ( ( ( ( x ) & 0xff00 ) >> 8 ) | \ - ( ( ( x ) & 0x00ff ) << 8 ) ) - - -#define htonl(x) ntohl(x) -#define htons(x) ntohs(x) - - - -#ifndef _I386_ERRNO_H -#define _I386_ERRNO_H - -#define EPERM 1 /* Operation not permitted */ -#define ENOENT 2 /* No such file or directory */ -#define ESRCH 3 /* No such process */ -#define EINTR 4 /* Interrupted system call */ -#define EIO 5 /* I/O error */ -#define ENXIO 6 /* No such device or address */ -#define E2BIG 7 /* Arg list too long */ -#define ENOEXEC 8 /* Exec format error */ -#define EBADF 9 /* Bad file number */ -#define ECHILD 10 /* No child processes */ -#define EAGAIN 11 /* Try again */ -#define ENOMEM 12 /* Out of memory */ -#define EACCES 13 /* Permission denied */ -#define EFAULT 14 /* Bad address */ -#define ENOTBLK 15 /* Block device required */ -#define EBUSY 16 /* Device or resource busy */ -#define EEXIST 17 /* File exists */ -#define EXDEV 18 /* Cross-device link */ -#define ENODEV 19 /* No such device */ -#define ENOTDIR 20 /* Not a directory */ -#define EISDIR 21 /* Is a directory */ -#define EINVAL 22 /* Invalid argument */ -#define ENFILE 23 /* File table overflow */ -#define EMFILE 24 /* Too many open files */ -#define ENOTTY 25 /* Not a typewriter */ -#define ETXTBSY 26 /* Text file busy */ -#define EFBIG 27 /* File too large */ -#define ENOSPC 28 /* No space left on device */ -#define ESPIPE 29 /* Illegal seek */ -#define EROFS 30 /* Read-only file system */ -#define EMLINK 31 /* Too many links */ -#define EPIPE 32 /* Broken pipe */ -#define EDOM 33 /* Math argument out of domain of func */ -#define ERANGE 34 /* Math result not representable */ -#undef EDEADLK -#define EDEADLK 35 /* Resource deadlock would occur */ -#undef ENAMETOOLONG -#define ENAMETOOLONG 36 /* File name too long */ -#undef ENOLCK -#define ENOLCK 37 /* No record locks available */ -#undef ENOSYS -#define ENOSYS 38 /* Function not implemented */ -#undef ENOTEMPTY -#define ENOTEMPTY 39 /* Directory not empty */ -#define ELOOP 40 /* Too many symbolic links encountered */ -#define EWOULDBLOCK EAGAIN /* Operation would block */ -#define ENOMSG 42 /* No message of desired type */ -#define EIDRM 43 /* Identifier removed */ -#define ECHRNG 44 /* Channel number out of range */ -#define EL2NSYNC 45 /* Level 2 not synchronized */ -#define EL3HLT 46 /* Level 3 halted */ -#define EL3RST 47 /* Level 3 reset */ -#define ELNRNG 48 /* Link number out of range */ -#define EUNATCH 49 /* Protocol driver not attached */ -#define ENOCSI 50 /* No CSI structure available */ -#define EL2HLT 51 /* Level 2 halted */ -#define EBADE 52 /* Invalid exchange */ -#define EBADR 53 /* Invalid request descriptor */ -#define EXFULL 54 /* Exchange full */ -#define ENOANO 55 /* No anode */ -#define EBADRQC 56 /* Invalid request code */ -#define EBADSLT 57 /* Invalid slot */ - -#define EDEADLOCK EDEADLK - -#define EBFONT 59 /* Bad font file format */ -#define ENOSTR 60 /* Device not a stream */ -#define ENODATA 61 /* No data available */ -#define ETIME 62 /* Timer expired */ -#define ENOSR 63 /* Out of streams resources */ -#define ENONET 64 /* Machine is not on the network */ -#define ENOPKG 65 /* Package not installed */ -#define EREMOTE 66 /* Object is remote */ -#define ENOLINK 67 /* Link has been severed */ -#define EADV 68 /* Advertise error */ -#define ESRMNT 69 /* Srmount error */ -#define ECOMM 70 /* Communication error on send */ -#define EPROTO 71 /* Protocol error */ -#define EMULTIHOP 72 /* Multihop attempted */ -#define EDOTDOT 73 /* RFS specific error */ -#define EBADMSG 74 /* Not a data message */ -#define EOVERFLOW 75 /* Value too large for defined data type */ -#define ENOTUNIQ 76 /* Name not unique on network */ -#define EBADFD 77 /* File descriptor in bad state */ -#define EREMCHG 78 /* Remote address changed */ -#define ELIBACC 79 /* Can not access a needed shared library */ -#define ELIBBAD 80 /* Accessing a corrupted shared library */ -#define ELIBSCN 81 /* .lib section in a.out corrupted */ -#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ -#define ELIBEXEC 83 /* Cannot exec a shared library directly */ -#undef EILSEQ -#define EILSEQ 84 /* Illegal byte sequence */ -#define ERESTART 85 /* Interrupted system call should be restarted */ -#define ESTRPIPE 86 /* Streams pipe error */ -#define EUSERS 87 /* Too many users */ -#define ENOTSOCK 88 /* Socket operation on non-socket */ -#define EDESTADDRREQ 89 /* Destination address required */ -#define EMSGSIZE 90 /* Message too long */ -#define EPROTOTYPE 91 /* Protocol wrong type for socket */ -#define ENOPROTOOPT 92 /* Protocol not available */ -#define EPROTONOSUPPORT 93 /* Protocol not supported */ -#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ -#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ -#define EPFNOSUPPORT 96 /* Protocol family not supported */ -#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ -#define EADDRINUSE 98 /* Address already in use */ -#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ -#define ENETDOWN 100 /* Network is down */ -#define ENETUNREACH 101 /* Network is unreachable */ -#define ENETRESET 102 /* Network dropped connection because of reset */ -#define ECONNABORTED 103 /* Software caused connection abort */ -#define ECONNRESET 104 /* Connection reset by peer */ -#define ENOBUFS 105 /* No buffer space available */ -#define EISCONN 106 /* Transport endpoint is already connected */ -#define ENOTCONN 107 /* Transport endpoint is not connected */ -#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ -#define ETOOMANYREFS 109 /* Too many references: cannot splice */ -#define ETIMEDOUT 110 /* Connection timed out */ -#define ECONNREFUSED 111 /* Connection refused */ -#define EHOSTDOWN 112 /* Host is down */ -#define EHOSTUNREACH 113 /* No route to host */ -#define EALREADY 114 /* Operation already in progress */ -#define EINPROGRESS 115 /* Operation now in progress */ -#define ESTALE 116 /* Stale NFS file handle */ -#define EUCLEAN 117 /* Structure needs cleaning */ -#define ENOTNAM 118 /* Not a XENIX named type file */ -#define ENAVAIL 119 /* No XENIX semaphores available */ -#define EISNAM 120 /* Is a named type file */ -#define EREMOTEIO 121 /* Remote I/O error */ -#define EDQUOT 122 /* Quota exceeded */ - -#define ENOMEDIUM 123 /* No medium found */ -#define EMEDIUMTYPE 124 /* Wrong medium type */ - -/* Should never be seen by user programs */ -#define ERESTARTSYS 512 -#define ERESTARTNOINTR 513 -#define ERESTARTNOHAND 514 /* restart if no handler.. */ -#define ENOIOCTLCMD 515 /* No ioctl command */ - -/* Defined for the NFSv3 protocol */ -#define EBADHANDLE 521 /* Illegal NFS file handle */ -#define ENOTSYNC 522 /* Update synchronization mismatch */ -#define EBADCOOKIE 523 /* Cookie is stale */ -#define ENOTSUPP 524 /* Operation is not supported */ -#define ETOOSMALL 525 /* Buffer or request is too small */ -#define ESERVERFAULT 526 /* An untranslatable error occurred */ -#define EBADTYPE 527 /* Type not supported by server */ -#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ - - - -/* open/fcntl - O_SYNC is only implemented on blocks devices and on files - located on an ext2 file system */ -#define O_ACCMODE 0003 -#define O_RDONLY 00 -#define O_WRONLY 01 -#define O_RDWR 02 -#define O_CREAT 0100 /* not fcntl */ -#define O_EXCL 0200 /* not fcntl */ -#define O_NOCTTY 0400 /* not fcntl */ -#define O_TRUNC 01000 /* not fcntl */ -#define O_APPEND 02000 -#define O_NONBLOCK 04000 -#define O_NDELAY O_NONBLOCK -#define O_SYNC 010000 -#define FASYNC 020000 /* fcntl, for BSD compatibility */ -#define O_DIRECT 040000 /* direct disk access hint */ -#define O_LARGEFILE 0100000 -#define O_DIRECTORY 0200000 /* must be a directory */ -#define O_NOFOLLOW 0400000 /* don't follow links */ - -#define F_DUPFD 0 /* dup */ -#define F_GETFD 1 /* get close_on_exec */ -#define F_SETFD 2 /* set/clear close_on_exec */ -#define F_GETFL 3 /* get file->f_flags */ -#define F_SETFL 4 /* set file->f_flags */ -#define F_GETLK 5 -#define F_SETLK 6 -#define F_SETLKW 7 - -#define F_SETOWN 8 /* for sockets. */ -#define F_GETOWN 9 /* for sockets. */ -#define F_SETSIG 10 /* for sockets. */ -#define F_GETSIG 11 /* for sockets. */ - -#define F_GETLK64 12 /* using 'struct flock64' */ -#define F_SETLK64 13 -#define F_SETLKW64 14 - -/* for F_[GET|SET]FL */ -#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ - -/* for posix fcntl() and lockf() */ -#define F_RDLCK 0 -#define F_WRLCK 1 -#define F_UNLCK 2 - -/* for old implementation of bsd flock () */ -#define F_EXLCK 4 /* or 3 */ -#define F_SHLCK 8 /* or 4 */ - -/* for leases */ -#define F_INPROGRESS 16 - -/* operations for bsd flock(), also used by the kernel implementation */ -#define LOCK_SH 1 /* shared lock */ -#define LOCK_EX 2 /* exclusive lock */ -#define LOCK_NB 4 /* or'd with one of the above to prevent - blocking */ -#define LOCK_UN 8 /* remove lock */ - -#define LOCK_MAND 32 /* This is a mandatory flock */ -#define LOCK_READ 64 /* ... Which allows concurrent read operations */ -#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */ -#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */ - -#endif - - -#ifndef LIBCFS_SIGNAL_H -#define LIBCFS_SIGNAL_H - -/* - * signal values ... - */ - -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 -#define SIGBUS 7 -#define SIGFPE 8 -#define SIGKILL 9 -#define SIGUSR1 10 -#define SIGSEGV 11 -#define SIGUSR2 12 -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGSTKFLT 16 -#define SIGCHLD 17 -#define SIGCONT 18 -#define SIGSTOP 19 -#define SIGTSTP 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGURG 23 -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGIO 29 -#define SIGPOLL SIGIO -/* -#define SIGLOST 29 -*/ -#define SIGPWR 30 -#define SIGSYS 31 -#define SIGUNUSED 31 - -/* These should not be considered constants from userland. */ -#define SIGRTMIN 32 -#define SIGRTMAX (_NSIG-1) - -/* - * SA_FLAGS values: - * - * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the - * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. - * SA_RESETHAND clears the handler when the signal is delivered. - * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. - * SA_NODEFER prevents the current signal from being masked in the handler. - * - * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single - * Unix names RESETHAND and NODEFER respectively. - */ -#define SA_NOCLDSTOP 0x00000001 -#define SA_NOCLDWAIT 0x00000002 /* not supported yet */ -#define SA_SIGINFO 0x00000004 -#define SA_ONSTACK 0x08000000 -#define SA_RESTART 0x10000000 -#define SA_NODEFER 0x40000000 -#define SA_RESETHAND 0x80000000 - -#define SA_NOMASK SA_NODEFER -#define SA_ONESHOT SA_RESETHAND -#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */ - -#define SA_RESTORER 0x04000000 - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -#define MINSIGSTKSZ 2048 -#define SIGSTKSZ 8192 - - -#define sigmask(sig) ((__u32)1 << ((sig) - 1)) - -#endif // LIBCFS_SIGNAL_H diff --git a/lnet/include/lnet/.cvsignore b/lnet/include/lnet/.cvsignore deleted file mode 100644 index 282522db0342d8750454b3dc162493b5fc709cc8..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lnet/include/lnet/Makefile.am b/lnet/include/lnet/Makefile.am deleted file mode 100644 index 33288749468458fc55a00f6e6551695922158148..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -lnetdir=$(includedir)/lnet - -SUBDIRS := linux -if DARWIN -SUBDIRS += darwin -endif -DIST_SUBDIRS := $(SUBDIRS) - -EXTRA_DIST = api.h api-support.h \ - lib-lnet.h lib-types.h lnet.h lnetctl.h types.h \ - socklnd.h ptllnd.h ptllnd_wire.h lnetst.h diff --git a/lnet/include/lnet/api-support.h b/lnet/include/lnet/api-support.h deleted file mode 100644 index 717559fd92685faca1230fa3deffbaddd646f7b4..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/api-support.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef __LNET_API_SUPPORT_H__ -#define __LNET_API_SUPPORT_H__ - -#if defined(__linux__) -#include <lnet/linux/api-support.h> -#elif defined(__APPLE__) -#include <lnet/darwin/api-support.h> -#elif defined(__WINNT__) -#include <lnet/winnt/api-support.h> -#else -#error Unsupported Operating System -#endif - -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <lnet/lnet.h> - -#endif diff --git a/lnet/include/lnet/api.h b/lnet/include/lnet/api.h deleted file mode 100644 index adbe2e28bf85bc312a1b1de5dfb7a13a0916f709..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/api.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef __LNET_API_H__ -#define __LNET_API_H__ - -#include <lnet/types.h> - -int LNetInit(void); -void LNetFini(void); - -int LNetNIInit(lnet_pid_t requested_pid); -int LNetNIFini(void); - -int LNetGetId(unsigned int index, lnet_process_id_t *id); -int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order); -int LNetCtl(unsigned int cmd, void *arg); -void LNetSnprintHandle (char *str, int str_len, lnet_handle_any_t handle); - -/* - * Portals - */ -int LNetSetLazyPortal(int portal); -int LNetClearLazyPortal(int portal); - -/* - * Match entries - */ -int LNetMEAttach(unsigned int portal, - lnet_process_id_t match_id_in, - __u64 match_bits_in, - __u64 ignore_bits_in, - lnet_unlink_t unlink_in, - lnet_ins_pos_t pos_in, - lnet_handle_me_t *handle_out); - -int LNetMEInsert(lnet_handle_me_t current_in, - lnet_process_id_t match_id_in, - __u64 match_bits_in, - __u64 ignore_bits_in, - lnet_unlink_t unlink_in, - lnet_ins_pos_t position_in, - lnet_handle_me_t *handle_out); - -int LNetMEUnlink(lnet_handle_me_t current_in); - -/* - * Memory descriptors - */ -int LNetMDAttach(lnet_handle_me_t current_in, - lnet_md_t md_in, - lnet_unlink_t unlink_in, - lnet_handle_md_t *handle_out); - -int LNetMDBind(lnet_md_t md_in, - lnet_unlink_t unlink_in, - lnet_handle_md_t *handle_out); - -int LNetMDUnlink(lnet_handle_md_t md_in); - -/* - * Event queues - */ -int LNetEQAlloc(unsigned int count_in, - lnet_eq_handler_t handler, - lnet_handle_eq_t *handle_out); - -int LNetEQFree(lnet_handle_eq_t eventq_in); - -int LNetEQGet(lnet_handle_eq_t eventq_in, - lnet_event_t *event_out); - - -int LNetEQWait(lnet_handle_eq_t eventq_in, - lnet_event_t *event_out); - -int LNetEQPoll(lnet_handle_eq_t *eventqs_in, - int neq_in, - int timeout_ms, - lnet_event_t *event_out, - int *which_eq_out); - -/* - * Data movement - */ -int LNetPut(lnet_nid_t self, - lnet_handle_md_t md_in, - lnet_ack_req_t ack_req_in, - lnet_process_id_t target_in, - unsigned int portal_in, - __u64 match_bits_in, - unsigned int offset_in, - __u64 hdr_data_in); - -int LNetGet(lnet_nid_t self, - lnet_handle_md_t md_in, - lnet_process_id_t target_in, - unsigned int portal_in, - __u64 match_bits_in, - unsigned int offset_in); - - -int LNetSetAsync(lnet_process_id_t id, int nasync); - -#ifndef __KERNEL__ -/* Temporary workaround to allow uOSS and test programs force server - * mode in userspace. See comments near ln_server_mode_flag in - * lnet/lib-types.h */ - -void lnet_server_mode(); -#endif - -#endif diff --git a/lnet/include/lnet/darwin/.cvsignore b/lnet/include/lnet/darwin/.cvsignore deleted file mode 100644 index 282522db0342d8750454b3dc162493b5fc709cc8..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lnet/include/lnet/darwin/Makefile.am b/lnet/include/lnet/darwin/Makefile.am deleted file mode 100644 index 409e1593f24dea6b9689354d8d7c05ad1772302b..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST := lib-lnet.h lib-types.h lnet.h api-support.h diff --git a/lnet/include/lnet/darwin/api-support.h b/lnet/include/lnet/darwin/api-support.h deleted file mode 100644 index c411f1730701c0fb4a98bcf98f863139415bf1df..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/api-support.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __DARWIN_API_SUPPORT_H__ -#define __DARWIN_API_SUPPORT_H__ - -#ifndef __LNET_API_SUPPORT_H__ -#error Do not #include this file directly. #include <portals/api-support.h> instead -#endif - -#ifndef __KERNEL__ -# include <stdio.h> -# include <stdlib.h> -# include <unistd.h> -# include <time.h> - -/* Lots of POSIX dependencies to support PtlEQWait_timeout */ -# include <signal.h> -# include <setjmp.h> -# include <time.h> - -# ifdef HAVE_LIBREADLINE -# include <readline/readline.h> -typedef VFunction rl_vintfunc_t; -typedef VFunction rl_voidfunc_t; -# endif -#endif - - -#endif diff --git a/lnet/include/lnet/darwin/lib-lnet.h b/lnet/include/lnet/darwin/lib-lnet.h deleted file mode 100644 index af4bc5de9b46cca2aa47b3dda83338cb3d46e3f4..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/lib-lnet.h +++ /dev/null @@ -1,16 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_DARWIN_LIB_LNET_H__ -#define __LNET_DARWIN_LIB_LNET_H__ - -#ifndef __LNET_LIB_LNET_H__ -#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead -#endif - -#include <string.h> -#include <libcfs/libcfs.h> - -#undef LNET_ROUTER - -#endif diff --git a/lnet/include/lnet/darwin/lib-types.h b/lnet/include/lnet/darwin/lib-types.h deleted file mode 100644 index f1552fb7ba690fe3d55c6d15778f22ace7dbc497..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/lib-types.h +++ /dev/null @@ -1,27 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_DARWIN_LIB_TYPES_H__ -#define __LNET_DARWIN_LIB_TYPES_H__ - -#ifndef __LNET_LIB_TYPES_H__ -#error Do not #include this file directly. #include <lnet/lib-types.h> instead -#endif - -#include <sys/types.h> -#include <libcfs/libcfs.h> -#include <libcfs/list.h> - -/* - * XXX Liang: - * - * Temporary fix, because lnet_me_free()->cfs_free->FREE() can be blocked in xnu, - * at then same time we've taken LNET_LOCK(), which is a spinlock. - * by using LNET_USE_LIB_FREELIST, we can avoid calling of FREE(). - * - * A better solution is moving lnet_me_free() out from LNET_LOCK, it's not hard - * but need to be very careful and take some time. - */ -#define LNET_USE_LIB_FREELIST - -#endif diff --git a/lnet/include/lnet/darwin/lnet.h b/lnet/include/lnet/darwin/lnet.h deleted file mode 100644 index 82a6127b5228ab18e3b094e8af02b4acf517c4af..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/lnet.h +++ /dev/null @@ -1,20 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_DARWIN_LNET_H__ -#define __LNET_DARWIN_LNET_H__ - -#ifndef __LNET_H__ -#error Do not #include this file directly. #include <lnet/lnet.h> instead -#endif - -/* - * lnet.h - * - * User application interface file - */ - -#include <sys/types.h> -#include <sys/uio.h> - -#endif diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h deleted file mode 100644 index a93354babb31969154fed7a3d371572e8957c751..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/lib-lnet.h +++ /dev/null @@ -1,685 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib-lnet.h - * - * Top level include for library side routines - */ - -#ifndef __LNET_LIB_LNET_H__ -#define __LNET_LIB_LNET_H__ - -#if defined(__linux__) -#include <lnet/linux/lib-lnet.h> -#elif defined(__APPLE__) -#include <lnet/darwin/lib-lnet.h> -#elif defined(__WINNT__) -#include <lnet/winnt/lib-lnet.h> -#else -#error Unsupported Operating System -#endif - -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-types.h> - -extern lnet_t the_lnet; /* THE network */ - -static inline int lnet_is_wire_handle_none (lnet_handle_wire_t *wh) -{ - return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_NONE.wh_interface_cookie && - wh->wh_object_cookie == LNET_WIRE_HANDLE_NONE.wh_object_cookie); -} - -static inline int lnet_md_exhausted (lnet_libmd_t *md) -{ - return (md->md_threshold == 0 || - ((md->md_options & LNET_MD_MAX_SIZE) != 0 && - md->md_offset + md->md_max_size > md->md_length)); -} - -static inline int lnet_md_unlinkable (lnet_libmd_t *md) -{ - /* Should unlink md when its refcount is 0 and either: - * - md has been flagged for deletion (by auto unlink or LNetM[DE]Unlink, - * in the latter case md may not be exhausted). - * - auto unlink is on and md is exhausted. - */ - if (md->md_refcount != 0) - return 0; - - if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) != 0) - return 1; - - return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 && - lnet_md_exhausted(md)); -} - -#ifdef __KERNEL__ -#define LNET_LOCK() spin_lock(&the_lnet.ln_lock) -#define LNET_UNLOCK() spin_unlock(&the_lnet.ln_lock) -#define LNET_MUTEX_DOWN(m) mutex_down(m) -#define LNET_MUTEX_UP(m) mutex_up(m) -#else -# ifndef HAVE_LIBPTHREAD -#define LNET_SINGLE_THREADED_LOCK(l) \ -do { \ - LASSERT ((l) == 0); \ - (l) = 1; \ -} while (0) - -#define LNET_SINGLE_THREADED_UNLOCK(l) \ -do { \ - LASSERT ((l) == 1); \ - (l) = 0; \ -} while (0) - -#define LNET_LOCK() LNET_SINGLE_THREADED_LOCK(the_lnet.ln_lock) -#define LNET_UNLOCK() LNET_SINGLE_THREADED_UNLOCK(the_lnet.ln_lock) -#define LNET_MUTEX_DOWN(m) LNET_SINGLE_THREADED_LOCK(*(m)) -#define LNET_MUTEX_UP(m) LNET_SINGLE_THREADED_UNLOCK(*(m)) -# else -#define LNET_LOCK() pthread_mutex_lock(&the_lnet.ln_lock) -#define LNET_UNLOCK() pthread_mutex_unlock(&the_lnet.ln_lock) -#define LNET_MUTEX_DOWN(m) pthread_mutex_lock(m) -#define LNET_MUTEX_UP(m) pthread_mutex_unlock(m) -# endif -#endif - -#define MAX_PORTALS 64 - -#ifdef LNET_USE_LIB_FREELIST - -#define MAX_MES 2048 -#define MAX_MDS 2048 -#define MAX_MSGS 2048 /* Outstanding messages */ -#define MAX_EQS 512 - -static inline void * -lnet_freelist_alloc (lnet_freelist_t *fl) -{ - /* ALWAYS called with liblock held */ - lnet_freeobj_t *o; - - if (list_empty (&fl->fl_list)) - return (NULL); - - o = list_entry (fl->fl_list.next, lnet_freeobj_t, fo_list); - list_del (&o->fo_list); - return ((void *)&o->fo_contents); -} - -static inline void -lnet_freelist_free (lnet_freelist_t *fl, void *obj) -{ - /* ALWAYS called with liblock held */ - lnet_freeobj_t *o = list_entry (obj, lnet_freeobj_t, fo_contents); - - list_add (&o->fo_list, &fl->fl_list); -} - - -static inline lnet_eq_t * -lnet_eq_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_eq_t *eq; - - LNET_LOCK(); - eq = (lnet_eq_t *)lnet_freelist_alloc(&the_lnet.ln_free_eqs); - LNET_UNLOCK(); - - return (eq); -} - -static inline void -lnet_eq_free (lnet_eq_t *eq) -{ - /* ALWAYS called with liblock held */ - lnet_freelist_free(&the_lnet.ln_free_eqs, eq); -} - -static inline lnet_libmd_t * -lnet_md_alloc (lnet_md_t *umd) -{ - /* NEVER called with liblock held */ - lnet_libmd_t *md; - - LNET_LOCK(); - md = (lnet_libmd_t *)lnet_freelist_alloc(&the_lnet.ln_free_mds); - LNET_UNLOCK(); - - return (md); -} - -static inline void -lnet_md_free (lnet_libmd_t *md) -{ - /* ALWAYS called with liblock held */ - lnet_freelist_free (&the_lnet.ln_free_mds, md); -} - -static inline lnet_me_t * -lnet_me_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_me_t *me; - - LNET_LOCK(); - me = (lnet_me_t *)lnet_freelist_alloc(&the_lnet.ln_free_mes); - LNET_UNLOCK(); - - return (me); -} - -static inline void -lnet_me_free (lnet_me_t *me) -{ - /* ALWAYS called with liblock held */ - lnet_freelist_free (&the_lnet.ln_free_mes, me); -} - -static inline lnet_msg_t * -lnet_msg_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_msg_t *msg; - - LNET_LOCK(); - msg = (lnet_msg_t *)lnet_freelist_alloc(&the_lnet.ln_free_msgs); - LNET_UNLOCK(); - - if (msg != NULL) { - /* NULL pointers, clear flags etc */ - memset (msg, 0, sizeof (*msg)); -#ifdef CRAY_XT3 - msg->msg_ev.uid = LNET_UID_ANY; -#endif - } - return(msg); -} - -static inline void -lnet_msg_free (lnet_msg_t *msg) -{ - /* ALWAYS called with liblock held */ - LASSERT (!msg->msg_onactivelist); - lnet_freelist_free(&the_lnet.ln_free_msgs, msg); -} - -#else - -static inline lnet_eq_t * -lnet_eq_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_eq_t *eq; - - LIBCFS_ALLOC(eq, sizeof(*eq)); - return (eq); -} - -static inline void -lnet_eq_free (lnet_eq_t *eq) -{ - /* ALWAYS called with liblock held */ - LIBCFS_FREE(eq, sizeof(*eq)); -} - -static inline lnet_libmd_t * -lnet_md_alloc (lnet_md_t *umd) -{ - /* NEVER called with liblock held */ - lnet_libmd_t *md; - int size; - unsigned int niov; - - if ((umd->options & LNET_MD_KIOV) != 0) { - niov = umd->length; - size = offsetof(lnet_libmd_t, md_iov.kiov[niov]); - } else { - niov = ((umd->options & LNET_MD_IOVEC) != 0) ? - umd->length : 1; - size = offsetof(lnet_libmd_t, md_iov.iov[niov]); - } - - LIBCFS_ALLOC(md, size); - - if (md != NULL) { - /* Set here in case of early free */ - md->md_options = umd->options; - md->md_niov = niov; - } - - return (md); -} - -static inline void -lnet_md_free (lnet_libmd_t *md) -{ - /* ALWAYS called with liblock held */ - int size; - - if ((md->md_options & LNET_MD_KIOV) != 0) - size = offsetof(lnet_libmd_t, md_iov.kiov[md->md_niov]); - else - size = offsetof(lnet_libmd_t, md_iov.iov[md->md_niov]); - - LIBCFS_FREE(md, size); -} - -static inline lnet_me_t * -lnet_me_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_me_t *me; - - LIBCFS_ALLOC(me, sizeof(*me)); - return (me); -} - -static inline void -lnet_me_free(lnet_me_t *me) -{ - /* ALWAYS called with liblock held */ - LIBCFS_FREE(me, sizeof(*me)); -} - -static inline lnet_msg_t * -lnet_msg_alloc(void) -{ - /* NEVER called with liblock held */ - lnet_msg_t *msg; - - LIBCFS_ALLOC(msg, sizeof(*msg)); - - if (msg != NULL) { - /* NULL pointers, clear flags etc */ - memset (msg, 0, sizeof (*msg)); -#ifdef CRAY_XT3 - msg->msg_ev.uid = LNET_UID_ANY; -#endif - } - return (msg); -} - -static inline void -lnet_msg_free(lnet_msg_t *msg) -{ - /* ALWAYS called with liblock held */ - LASSERT (!msg->msg_onactivelist); - LIBCFS_FREE(msg, sizeof(*msg)); -} -#endif - -extern lnet_libhandle_t *lnet_lookup_cookie (__u64 cookie, int type); -extern void lnet_initialise_handle (lnet_libhandle_t *lh, int type); -extern void lnet_invalidate_handle (lnet_libhandle_t *lh); - -static inline void -lnet_eq2handle (lnet_handle_eq_t *handle, lnet_eq_t *eq) -{ - if (eq == NULL) { - *handle = LNET_EQ_NONE; - return; - } - - handle->cookie = eq->eq_lh.lh_cookie; -} - -static inline lnet_eq_t * -lnet_handle2eq (lnet_handle_eq_t *handle) -{ - /* ALWAYS called with liblock held */ - lnet_libhandle_t *lh = lnet_lookup_cookie(handle->cookie, - LNET_COOKIE_TYPE_EQ); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lnet_eq_t, eq_lh)); -} - -static inline void -lnet_md2handle (lnet_handle_md_t *handle, lnet_libmd_t *md) -{ - handle->cookie = md->md_lh.lh_cookie; -} - -static inline lnet_libmd_t * -lnet_handle2md (lnet_handle_md_t *handle) -{ - /* ALWAYS called with liblock held */ - lnet_libhandle_t *lh = lnet_lookup_cookie(handle->cookie, - LNET_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lnet_libmd_t, md_lh)); -} - -static inline lnet_libmd_t * -lnet_wire_handle2md (lnet_handle_wire_t *wh) -{ - /* ALWAYS called with liblock held */ - lnet_libhandle_t *lh; - - if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie) - return (NULL); - - lh = lnet_lookup_cookie(wh->wh_object_cookie, - LNET_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lnet_libmd_t, md_lh)); -} - -static inline void -lnet_me2handle (lnet_handle_me_t *handle, lnet_me_t *me) -{ - handle->cookie = me->me_lh.lh_cookie; -} - -static inline lnet_me_t * -lnet_handle2me (lnet_handle_me_t *handle) -{ - /* ALWAYS called with liblock held */ - lnet_libhandle_t *lh = lnet_lookup_cookie(handle->cookie, - LNET_COOKIE_TYPE_ME); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lnet_me_t, me_lh)); -} - -static inline void -lnet_peer_addref_locked(lnet_peer_t *lp) -{ - LASSERT (lp->lp_refcount > 0); - lp->lp_refcount++; -} - -extern void lnet_destroy_peer_locked(lnet_peer_t *lp); - -static inline void -lnet_peer_decref_locked(lnet_peer_t *lp) -{ - LASSERT (lp->lp_refcount > 0); - lp->lp_refcount--; - if (lp->lp_refcount == 0) - lnet_destroy_peer_locked(lp); -} - -static inline int -lnet_isrouter(lnet_peer_t *lp) -{ - return lp->lp_rtr_refcount != 0; -} - -static inline void -lnet_ni_addref_locked(lnet_ni_t *ni) -{ - LASSERT (ni->ni_refcount > 0); - ni->ni_refcount++; -} - -static inline void -lnet_ni_addref(lnet_ni_t *ni) -{ - LNET_LOCK(); - lnet_ni_addref_locked(ni); - LNET_UNLOCK(); -} - -static inline void -lnet_ni_decref_locked(lnet_ni_t *ni) -{ - LASSERT (ni->ni_refcount > 0); - ni->ni_refcount--; - if (ni->ni_refcount == 0) - list_add_tail(&ni->ni_list, &the_lnet.ln_zombie_nis); -} - -static inline void -lnet_ni_decref(lnet_ni_t *ni) -{ - LNET_LOCK(); - lnet_ni_decref_locked(ni); - LNET_UNLOCK(); -} - -static inline lnet_nid_t -lnet_ptlcompat_srcnid(lnet_nid_t src, lnet_nid_t dst) -{ - /* Give myself a portals srcnid if I'm sending to portals */ - if (the_lnet.ln_ptlcompat > 0 && - LNET_NIDNET(dst) == 0) - return LNET_MKNID(0, LNET_NIDADDR(src)); - - return src; -} - -static inline int -lnet_ptlcompat_matchnid(lnet_nid_t lnet_nid, lnet_nid_t ptl_nid) -{ - return ((ptl_nid == lnet_nid) || - (the_lnet.ln_ptlcompat > 0 && - LNET_NIDNET(ptl_nid) == 0 && - LNET_NETTYP(LNET_NIDNET(lnet_nid)) != LOLND && - LNET_NIDADDR(ptl_nid) == LNET_NIDADDR(lnet_nid))); -} - -static inline int -lnet_ptlcompat_matchnet(__u32 lnet_net, __u32 ptl_net) -{ - return ((ptl_net == lnet_net) || - (the_lnet.ln_ptlcompat > 0 && - ptl_net == 0 && - LNET_NETTYP(lnet_net) != LOLND)); -} - -static inline struct list_head * -lnet_nid2peerhash (lnet_nid_t nid) -{ - unsigned int idx = LNET_NIDADDR(nid) % LNET_PEER_HASHSIZE; - - return &the_lnet.ln_peer_hash[idx]; -} - -extern lnd_t the_lolnd; - -#ifndef __KERNEL__ -/* unconditional registration */ -#define LNET_REGISTER_ULND(lnd) \ -do { \ - extern lnd_t lnd; \ - \ - lnet_register_lnd(&(lnd)); \ -} while (0) - -/* conditional registration */ -#define LNET_REGISTER_ULND_IF_PRESENT(lnd) \ -do { \ - extern lnd_t lnd __attribute__ ((weak, alias("the_lolnd"))); \ - \ - if (&(lnd) != &the_lolnd) \ - lnet_register_lnd(&(lnd)); \ -} while (0) -#endif - -#ifdef CRAY_XT3 -inline static void -lnet_set_msg_uid(lnet_ni_t *ni, lnet_msg_t *msg, lnet_uid_t uid) -{ - LASSERT (msg->msg_ev.uid == LNET_UID_ANY); - msg->msg_ev.uid = uid; -} -#endif - -extern lnet_ni_t *lnet_nid2ni_locked (lnet_nid_t nid); -extern lnet_ni_t *lnet_net2ni_locked (__u32 net); -static inline lnet_ni_t * -lnet_net2ni (__u32 net) -{ - lnet_ni_t *ni; - - LNET_LOCK(); - ni = lnet_net2ni_locked(net); - LNET_UNLOCK(); - - return ni; -} - -int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, time_t when); -int lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway_nid); -int lnet_check_routes(void); -int lnet_del_route(__u32 net, lnet_nid_t gw_nid); -void lnet_destroy_routes(void); -int lnet_get_route(int idx, __u32 *net, __u32 *hops, - lnet_nid_t *gateway, __u32 *alive); -void lnet_proc_init(void); -void lnet_proc_fini(void); -void lnet_init_rtrpools(void); -int lnet_alloc_rtrpools(int im_a_router); -void lnet_free_rtrpools(void); -lnet_remotenet_t *lnet_find_net_locked (__u32 net); - -int lnet_islocalnid(lnet_nid_t nid); -int lnet_islocalnet(__u32 net); - -void lnet_build_unlink_event(lnet_libmd_t *md, lnet_event_t *ev); -void lnet_enq_event_locked(lnet_eq_t *eq, lnet_event_t *ev); -void lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target, - unsigned int offset, unsigned int len); -int lnet_send(lnet_nid_t nid, lnet_msg_t *msg); -void lnet_return_credits_locked (lnet_msg_t *msg); -void lnet_match_blocked_msg(lnet_libmd_t *md); -int lnet_parse (lnet_ni_t *ni, lnet_hdr_t *hdr, - lnet_nid_t fromnid, void *private, int rdma_req); -void lnet_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int offset, unsigned int mlen, unsigned int rlen); -lnet_msg_t *lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *get_msg); -void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *msg, unsigned int len); -void lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int rc); - -char *lnet_msgtyp2str (int type); -void lnet_print_hdr (lnet_hdr_t * hdr); -int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold); - -unsigned int lnet_iov_nob (unsigned int niov, struct iovec *iov); -int lnet_extract_iov (int dst_niov, struct iovec *dst, - int src_niov, struct iovec *src, - unsigned int offset, unsigned int len); - -unsigned int lnet_kiov_nob (unsigned int niov, lnet_kiov_t *iov); -int lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len); - -void lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov, - unsigned int doffset, - unsigned int nsiov, struct iovec *siov, - unsigned int soffset, unsigned int nob); -void lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, - unsigned int iovoffset, - unsigned int nkiov, lnet_kiov_t *kiov, - unsigned int kiovoffset, unsigned int nob); -void lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, - unsigned int kiovoffset, - unsigned int niov, struct iovec *iov, - unsigned int iovoffset, unsigned int nob); -void lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov, - unsigned int doffset, - unsigned int nskiov, lnet_kiov_t *skiov, - unsigned int soffset, unsigned int nob); - -static inline void -lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset, - unsigned int nsiov, struct iovec *siov, unsigned int soffset, - unsigned int nob) -{ - struct iovec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen}; - - lnet_copy_iov2iov(1, &diov, doffset, - nsiov, siov, soffset, nob); -} - -static inline void -lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset, - unsigned int nsiov, lnet_kiov_t *skiov, unsigned int soffset, - unsigned int nob) -{ - struct iovec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen}; - - lnet_copy_kiov2iov(1, &diov, doffset, - nsiov, skiov, soffset, nob); -} - -static inline void -lnet_copy_flat2iov(unsigned int ndiov, struct iovec *diov, unsigned int doffset, - int slen, void *src, unsigned int soffset, unsigned int nob) -{ - struct iovec siov = {/*.iov_base = */ src, /*.iov_len = */slen}; - lnet_copy_iov2iov(ndiov, diov, doffset, - 1, &siov, soffset, nob); -} - -static inline void -lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov, unsigned int doffset, - int slen, void *src, unsigned int soffset, unsigned int nob) -{ - struct iovec siov = {/* .iov_base = */ src, /* .iov_len = */ slen}; - lnet_copy_iov2kiov(ndiov, dkiov, doffset, - 1, &siov, soffset, nob); -} - -void lnet_me_unlink(lnet_me_t *me); - -void lnet_md_unlink(lnet_libmd_t *md); -void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd); - -void lnet_register_lnd(lnd_t *lnd); -void lnet_unregister_lnd(lnd_t *lnd); -int lnet_set_ip_niaddr (lnet_ni_t *ni); - -#ifdef __KERNEL__ -int lnet_connect(cfs_socket_t **sockp, lnet_nid_t peer_nid, - __u32 local_ip, __u32 peer_ip, int peer_port); -void lnet_connect_console_error(int rc, lnet_nid_t peer_nid, - __u32 peer_ip, int port); -int lnet_count_acceptor_nis(lnet_ni_t **first_ni); -int lnet_accept(lnet_ni_t *blind_ni, cfs_socket_t *sock, __u32 magic); -int lnet_acceptor_timeout(void); -int lnet_acceptor_port(void); -#endif - -#ifdef HAVE_LIBPTHREAD -int lnet_count_acceptor_nis(lnet_ni_t **first_ni); -int lnet_acceptor_port(void); -#endif - -int lnet_acceptor_start(void); -void lnet_acceptor_stop(void); - -int lnet_peers_start_down(void); -int lnet_router_checker_start(void); -void lnet_router_checker_stop(void); - -int lnet_ping_target_init(void); -void lnet_ping_target_fini(void); -int lnet_ping(lnet_process_id_t id, int timeout_ms, - lnet_process_id_t *ids, int n_ids); - -int lnet_parse_ip2nets (char **networksp, char *ip2nets); -int lnet_parse_routes (char *route_str, int *im_a_router); -int lnet_parse_networks (struct list_head *nilist, char *networks); - -int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid); -lnet_peer_t *lnet_find_peer_locked (lnet_nid_t nid); -void lnet_clear_peer_table(void); -void lnet_destroy_peer_table(void); -int lnet_create_peer_table(void); -void lnet_debug_peer(lnet_nid_t nid); - -#endif diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h deleted file mode 100644 index 7a06a286e3a21f29f8e35e164b50b2c0e81df3a3..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/lib-types.h +++ /dev/null @@ -1,565 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * p30/lib-types.h - * - * Types used by the library side routines that do not need to be - * exposed to the user application - */ - -#ifndef __LNET_LIB_TYPES_H__ -#define __LNET_LIB_TYPES_H__ - -#if defined(__linux__) -#include <lnet/linux/lib-types.h> -#elif defined(__APPLE__) -#include <lnet/darwin/lib-types.h> -#elif defined(__WINNT__) -#include <lnet/winnt/lib-types.h> -#else -#error Unsupported Operating System -#endif - -#include <libcfs/libcfs.h> -#include <libcfs/list.h> -#include <lnet/types.h> - -#define WIRE_ATTR __attribute__((packed)) - -/* The wire handle's interface cookie only matches one network interface in - * one epoch (i.e. new cookie when the interface restarts or the node - * reboots). The object cookie only matches one object on that interface - * during that object's lifetime (i.e. no cookie re-use). */ -typedef struct { - __u64 wh_interface_cookie; - __u64 wh_object_cookie; -} WIRE_ATTR lnet_handle_wire_t; - -/* byte-flip insensitive! */ -#define LNET_WIRE_HANDLE_NONE \ -((const lnet_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1}) - -typedef enum { - LNET_MSG_ACK = 0, - LNET_MSG_PUT, - LNET_MSG_GET, - LNET_MSG_REPLY, - LNET_MSG_HELLO, -} lnet_msg_type_t; - -/* The variant fields of the portals message header are aligned on an 8 - * byte boundary in the message header. Note that all types used in these - * wire structs MUST be fixed size and the smaller types are placed at the - * end. */ -typedef struct lnet_ack { - lnet_handle_wire_t dst_wmd; - __u64 match_bits; - __u32 mlength; -} WIRE_ATTR lnet_ack_t; - -typedef struct lnet_put { - lnet_handle_wire_t ack_wmd; - __u64 match_bits; - __u64 hdr_data; - __u32 ptl_index; - __u32 offset; -} WIRE_ATTR lnet_put_t; - -typedef struct lnet_get { - lnet_handle_wire_t return_wmd; - __u64 match_bits; - __u32 ptl_index; - __u32 src_offset; - __u32 sink_length; -} WIRE_ATTR lnet_get_t; - -typedef struct lnet_reply { - lnet_handle_wire_t dst_wmd; -} WIRE_ATTR lnet_reply_t; - -typedef struct lnet_hello { - __u64 incarnation; - __u32 type; -} WIRE_ATTR lnet_hello_t; - -typedef struct { - lnet_nid_t dest_nid; - lnet_nid_t src_nid; - lnet_pid_t dest_pid; - lnet_pid_t src_pid; - __u32 type; /* lnet_msg_type_t */ - __u32 payload_length; /* payload data to follow */ - /*<------__u64 aligned------->*/ - union { - lnet_ack_t ack; - lnet_put_t put; - lnet_get_t get; - lnet_reply_t reply; - lnet_hello_t hello; - } msg; -} WIRE_ATTR lnet_hdr_t; - -/* A HELLO message contains a magic number and protocol version - * code in the header's dest_nid, the peer's NID in the src_nid, and - * LNET_MSG_HELLO in the type field. All other common fields are zero - * (including payload_size; i.e. no payload). - * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is - * running the same protocol and to find out its NID. These LNDs should - * exchange HELLO messages when a connection is first established. Individual - * LNDs can put whatever else they fancy in lnet_hdr_t::msg. - */ -typedef struct { - __u32 magic; /* LNET_PROTO_TCP_MAGIC */ - __u16 version_major; /* increment on incompatible change */ - __u16 version_minor; /* increment on compatible change */ -} WIRE_ATTR lnet_magicversion_t; - -/* PROTO MAGIC for LNDs */ -#define LNET_PROTO_IB_MAGIC 0x0be91b91 -#define LNET_PROTO_OPENIB_MAGIC LNET_PROTO_IB_MAGIC -#define LNET_PROTO_IIB_MAGIC LNET_PROTO_IB_MAGIC -#define LNET_PROTO_VIB_MAGIC LNET_PROTO_IB_MAGIC -#define LNET_PROTO_RA_MAGIC 0x0be91b92 -#define LNET_PROTO_QSW_MAGIC 0x0be91b93 -#define LNET_PROTO_TCP_MAGIC 0xeebc0ded -#define LNET_PROTO_PTL_MAGIC 0x50746C4E /* 'PtlN' unique magic */ -#define LNET_PROTO_GM_MAGIC 0x6d797269 /* 'myri'! */ -#define LNET_PROTO_MX_MAGIC 0x4d583130 /* 'MX10'! */ -#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100 -#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */ - -/* Placeholder for a future "unified" protocol across all LNDs */ -/* Current LNDs that receive a request with this magic will respond with a - * "stub" reply using their current protocol */ -#define LNET_PROTO_MAGIC 0x45726963 /* ! */ - - -#define LNET_PROTO_TCP_VERSION_MAJOR 1 -#define LNET_PROTO_TCP_VERSION_MINOR 0 - -/* Acceptor connection request */ -typedef struct { - __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */ - __u32 acr_version; /* protocol version */ - __u64 acr_nid; /* target NID */ -} WIRE_ATTR lnet_acceptor_connreq_t; - -#define LNET_PROTO_ACCEPTOR_VERSION 1 - -/* forward refs */ -struct lnet_libmd; - -typedef struct lnet_msg { - struct list_head msg_activelist; - struct list_head msg_list; /* Q for credits/MD */ - - lnet_process_id_t msg_target; - __u32 msg_type; - - unsigned int msg_target_is_router:1; /* sending to a router */ - unsigned int msg_routing:1; /* being forwarded */ - unsigned int msg_ack:1; /* ack on finalize (PUT) */ - unsigned int msg_sending:1; /* outgoing message */ - unsigned int msg_receiving:1; /* being received */ - unsigned int msg_delayed:1; /* had to Q for buffer or tx credit */ - unsigned int msg_txcredit:1; /* taken an NI send credit */ - unsigned int msg_peertxcredit:1; /* taken a peer send credit */ - unsigned int msg_rtrcredit:1; /* taken a globel router credit */ - unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ - unsigned int msg_onactivelist:1; /* on the activelist */ - - struct lnet_peer *msg_txpeer; /* peer I'm sending to */ - struct lnet_peer *msg_rxpeer; /* peer I received from */ - - void *msg_private; - struct lnet_libmd *msg_md; - - unsigned int msg_len; - unsigned int msg_wanted; - unsigned int msg_offset; - unsigned int msg_niov; - struct iovec *msg_iov; - lnet_kiov_t *msg_kiov; - - lnet_event_t msg_ev; - lnet_hdr_t msg_hdr; -} lnet_msg_t; - - -typedef struct lnet_libhandle { - struct list_head lh_hash_chain; - __u64 lh_cookie; -} lnet_libhandle_t; - -#define lh_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) - -typedef struct lnet_eq { - struct list_head eq_list; - lnet_libhandle_t eq_lh; - lnet_seq_t eq_enq_seq; - lnet_seq_t eq_deq_seq; - unsigned int eq_size; - lnet_event_t *eq_events; - int eq_refcount; - lnet_eq_handler_t eq_callback; -} lnet_eq_t; - -typedef struct lnet_me { - struct list_head me_list; - lnet_libhandle_t me_lh; - lnet_process_id_t me_match_id; - unsigned int me_portal; - __u64 me_match_bits; - __u64 me_ignore_bits; - lnet_unlink_t me_unlink; - struct lnet_libmd *me_md; -} lnet_me_t; - -typedef struct lnet_libmd { - struct list_head md_list; - lnet_libhandle_t md_lh; - lnet_me_t *md_me; - char *md_start; - unsigned int md_offset; - unsigned int md_length; - unsigned int md_max_size; - int md_threshold; - int md_refcount; - unsigned int md_options; - unsigned int md_flags; - void *md_user_ptr; - lnet_eq_t *md_eq; - void *md_addrkey; - unsigned int md_niov; /* # frags */ - union { - struct iovec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; - } md_iov; -} lnet_libmd_t; - -#define LNET_MD_FLAG_ZOMBIE (1 << 0) -#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) - -#ifdef LNET_USE_LIB_FREELIST -typedef struct -{ - void *fl_objs; /* single contiguous array of objects */ - int fl_nobjs; /* the number of them */ - int fl_objsize; /* the size (including overhead) of each of them */ - struct list_head fl_list; /* where they are enqueued */ -} lnet_freelist_t; - -typedef struct -{ - struct list_head fo_list; /* enqueue on fl_list */ - void *fo_contents; /* aligned contents */ -} lnet_freeobj_t; -#endif - -typedef struct { - /* info about peers we are trying to fail */ - struct list_head tp_list; /* ln_test_peers */ - lnet_nid_t tp_nid; /* matching nid */ - unsigned int tp_threshold; /* # failures to simulate */ -} lnet_test_peer_t; - -#define LNET_COOKIE_TYPE_MD 1 -#define LNET_COOKIE_TYPE_ME 2 -#define LNET_COOKIE_TYPE_EQ 3 -#define LNET_COOKIE_TYPES 4 -/* LNET_COOKIE_TYPES must be a power of 2, so the cookie type can be - * extracted by masking with (LNET_COOKIE_TYPES - 1) */ - -struct lnet_ni; /* forward ref */ - -typedef struct lnet_lnd -{ - /* fields managed by portals */ - struct list_head lnd_list; /* stash in the LND table */ - int lnd_refcount; /* # active instances */ - - /* fields initialised by the LND */ - unsigned int lnd_type; - - int (*lnd_startup) (struct lnet_ni *ni); - void (*lnd_shutdown) (struct lnet_ni *ni); - int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); - - /* In data movement APIs below, payload buffers are described as a set - * of 'niov' fragments which are... - * EITHER - * in virtual memory (struct iovec *iov != NULL) - * OR - * in pages (kernel only: plt_kiov_t *kiov != NULL). - * The LND may NOT overwrite these fragment descriptors. - * An 'offset' and may specify a byte offset within the set of - * fragments to start from - */ - - /* Start sending a preformatted message. 'private' is NULL for PUT and - * GET messages; otherwise this is a response to an incoming message - * and 'private' is the 'private' passed to lnet_parse(). Return - * non-zero for immediate failure, otherwise complete later with - * lnet_finalize() */ - int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg); - - /* Start receiving 'mlen' bytes of payload data, skipping the following - * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to - * lnet_parse(). Return non-zero for immedaite failure, otherwise - * complete later with lnet_finalize(). This also gives back a receive - * credit if the LND does flow control. */ - int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); - - /* lnet_parse() has had to delay processing of this message - * (e.g. waiting for a forwarding buffer or send credits). Give the - * LND a chance to free urgently needed resources. If called, return 0 - * for success and do NOT give back a receive credit; that has to wait - * until lnd_recv() gets called. On failure return < 0 and - * release resources; lnd_recv() will not be called. */ - int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, - void **new_privatep); - - /* notification of peer health */ - void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); - -#ifdef __KERNEL__ - /* accept a new connection */ - int (*lnd_accept)(struct lnet_ni *ni, cfs_socket_t *sock); -#else - /* wait for something to happen */ - void (*lnd_wait)(struct lnet_ni *ni, int milliseconds); - - /* ensure non-RDMA messages can be received outside liblustre */ - int (*lnd_setasync)(struct lnet_ni *ni, lnet_process_id_t id, int nasync); - -#ifdef HAVE_LIBPTHREAD - int (*lnd_accept)(struct lnet_ni *ni, int sock); -#endif -#endif -} lnd_t; - -#define LNET_MAX_INTERFACES 16 - -typedef struct lnet_ni { - struct list_head ni_list; /* chain on ln_nis */ - struct list_head ni_txq; /* messages waiting for tx credits */ - int ni_maxtxcredits; /* # tx credits */ - int ni_txcredits; /* # tx credits free */ - int ni_mintxcredits; /* lowest it's been */ - int ni_peertxcredits; /* # per-peer send credits */ - lnet_nid_t ni_nid; /* interface's NID */ - void *ni_data; /* instance-specific data */ - lnd_t *ni_lnd; /* procedural interface */ - int ni_refcount; /* reference count */ - char *ni_interfaces[LNET_MAX_INTERFACES]; /* equivalent interfaces to use */ -} lnet_ni_t; - -typedef struct lnet_peer { - struct list_head lp_hashlist; /* chain on peer hash */ - struct list_head lp_txq; /* messages blocking for tx credits */ - struct list_head lp_rtrq; /* messages blocking for router credits */ - struct list_head lp_rtr_list; /* chain on router list */ - int lp_txcredits; /* # tx credits available */ - int lp_mintxcredits; /* low water mark */ - int lp_rtrcredits; /* # router credits */ - int lp_minrtrcredits; /* low water mark */ - unsigned int lp_alive:1; /* alive/dead? */ - unsigned int lp_notify:1; /* notification outstanding? */ - unsigned int lp_notifylnd:1; /* outstanding notification for LND? */ - unsigned int lp_notifying:1; /* some thread is handling notification */ - unsigned int lp_ping_notsent; /* SEND event outstanding from ping */ - int lp_alive_count; /* # times router went dead<->alive */ - long lp_txqnob; /* bytes queued for sending */ - time_t lp_timestamp; /* time of last aliveness news */ - time_t lp_ping_timestamp; /* time of last ping attempt */ - time_t lp_ping_deadline; /* != 0 if ping reply expected */ - lnet_ni_t *lp_ni; /* interface peer is on */ - lnet_nid_t lp_nid; /* peer's NID */ - int lp_refcount; /* # refs */ - int lp_rtr_refcount; /* # refs from lnet_route_t::lr_gateway */ -} lnet_peer_t; - -typedef struct { - struct list_head lr_list; /* chain on net */ - lnet_peer_t *lr_gateway; /* router node */ -} lnet_route_t; - -typedef struct { - struct list_head lrn_list; /* chain on ln_remote_nets */ - struct list_head lrn_routes; /* routes to me */ - __u32 lrn_net; /* my net number */ - unsigned int lrn_hops; /* how far I am */ -} lnet_remotenet_t; - -typedef struct { - struct list_head rbp_bufs; /* my free buffer pool */ - struct list_head rbp_msgs; /* messages blocking for a buffer */ - int rbp_npages; /* # pages in each buffer */ - int rbp_nbuffers; /* # buffers */ - int rbp_credits; /* # free buffers / blocked messages */ - int rbp_mincredits; /* low water mark */ -} lnet_rtrbufpool_t; - -typedef struct { - struct list_head rb_list; /* chain on rbp_bufs */ - lnet_rtrbufpool_t *rb_pool; /* owning pool */ - lnet_kiov_t rb_kiov[0]; /* the buffer space */ -} lnet_rtrbuf_t; - -typedef struct { - __u32 msgs_alloc; - __u32 msgs_max; - __u32 errors; - __u32 send_count; - __u32 recv_count; - __u32 route_count; - __u32 drop_count; - __u64 send_length; - __u64 recv_length; - __u64 route_length; - __u64 drop_length; -} lnet_counters_t; - -#define LNET_PEER_HASHSIZE 503 /* prime! */ - -#define LNET_NRBPOOLS 3 /* # different router buffer pools */ - -#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL -#define LNET_PROTO_PING_VERSION 1 -typedef struct { - __u32 pi_magic; - __u32 pi_version; - lnet_pid_t pi_pid; - __u32 pi_nnids; - lnet_nid_t pi_nid[0]; -} WIRE_ATTR lnet_ping_info_t; - -/* Options for lnet_portal_t::ptl_options */ -#define LNET_PTL_LAZY (1 << 0) -typedef struct { - struct list_head ptl_ml; /* match list */ - struct list_head ptl_msgq; /* messages blocking for MD */ - __u64 ptl_msgq_version; /* validity stamp */ - unsigned int ptl_options; -} lnet_portal_t; - -/* Router Checker */ -/* < 0 == startup error */ -#define LNET_RC_STATE_SHUTDOWN 0 /* not started */ -#define LNET_RC_STATE_RUNNING 1 /* started up OK */ -#define LNET_RC_STATE_STOPTHREAD 2 /* telling thread to stop */ -#define LNET_RC_STATE_UNLINKING 3 /* unlinking RC MD */ -#define LNET_RC_STATE_UNLINKED 4 /* RC's MD has been unlinked */ - -typedef struct -{ - /* Stuff initialised at LNetInit() */ - int ln_init; /* LNetInit() called? */ - int ln_refcount; /* LNetNIInit/LNetNIFini counter */ - int ln_niinit_self; /* Have I called LNetNIInit myself? */ - - int ln_ptlcompat; /* do I support talking to portals? */ - - struct list_head ln_lnds; /* registered LNDs */ - -#ifdef __KERNEL__ - spinlock_t ln_lock; - cfs_waitq_t ln_waitq; - struct semaphore ln_api_mutex; - struct semaphore ln_lnd_mutex; -#else -# ifndef HAVE_LIBPTHREAD - int ln_lock; - int ln_api_mutex; - int ln_lnd_mutex; -# else - pthread_cond_t ln_cond; - pthread_mutex_t ln_lock; - pthread_mutex_t ln_api_mutex; - pthread_mutex_t ln_lnd_mutex; -# endif -#endif - - /* Stuff initialised at LNetNIInit() */ - - int ln_shutdown; /* shutdown in progress */ - int ln_nportals; /* # portals */ - lnet_portal_t *ln_portals; /* the vector of portals */ - - lnet_pid_t ln_pid; /* requested pid */ - - struct list_head ln_nis; /* LND instances */ - lnet_ni_t *ln_loni; /* the loopback NI */ - lnet_ni_t *ln_eqwaitni; /* NI to wait for events in */ - struct list_head ln_zombie_nis; /* dying LND instances */ - int ln_nzombie_nis; /* # of NIs to wait for */ - - struct list_head ln_remote_nets; /* remote networks with routes to them */ - __u64 ln_remote_nets_version; /* validity stamp */ - - struct list_head ln_routers; /* list of all known routers */ - __u64 ln_routers_version; /* validity stamp */ - - struct list_head *ln_peer_hash; /* NID->peer hash */ - int ln_npeers; /* # peers extant */ - int ln_peertable_version; /* /proc validity stamp */ - - int ln_routing; /* am I a router? */ - lnet_rtrbufpool_t ln_rtrpools[LNET_NRBPOOLS]; /* router buffer pools */ - - int ln_lh_hash_size; /* size of lib handle hash table */ - struct list_head *ln_lh_hash_table; /* all extant lib handles, this interface */ - __u64 ln_next_object_cookie; /* cookie generator */ - __u64 ln_interface_cookie; /* uniquely identifies this ni in this epoch */ - - char *ln_network_tokens; /* space for network names */ - int ln_network_tokens_nob; - - int ln_testprotocompat; /* test protocol compatibility flags */ - - struct list_head ln_finalizeq; /* msgs waiting to complete finalizing */ -#ifdef __KERNEL__ - void **ln_finalizers; /* threads doing finalization */ - int ln_nfinalizers; /* max # threads finalizing */ -#else - int ln_finalizing; -#endif - struct list_head ln_test_peers; /* failure simulation */ - - lnet_handle_md_t ln_ping_target_md; - lnet_handle_eq_t ln_ping_target_eq; - lnet_ping_info_t *ln_ping_info; - -#ifdef __KERNEL__ - int ln_rc_state; /* router checker startup/shutdown state */ - struct semaphore ln_rc_signal; /* serialise startup/shutdown */ - lnet_handle_eq_t ln_rc_eqh; /* router checker's event queue */ -#endif - -#ifdef LNET_USE_LIB_FREELIST - lnet_freelist_t ln_free_mes; - lnet_freelist_t ln_free_msgs; - lnet_freelist_t ln_free_mds; - lnet_freelist_t ln_free_eqs; -#endif - struct list_head ln_active_msgs; - struct list_head ln_active_mds; - struct list_head ln_active_eqs; - - lnet_counters_t ln_counters; - -#ifndef __KERNEL__ - /* Temporary workaround to allow uOSS and test programs force - * server mode in userspace. The only place where we use it is - * lnet_prepare(). The only way to turn this flag on is to - * call lnet_server_mode() */ - - int ln_server_mode_flag; -#endif -} lnet_t; - -#endif diff --git a/lnet/include/lnet/linux/.cvsignore b/lnet/include/lnet/linux/.cvsignore deleted file mode 100644 index 3dda72986fc5af262451a760393b3a7065938c80..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile.in -Makefile diff --git a/lnet/include/lnet/linux/Makefile.am b/lnet/include/lnet/linux/Makefile.am deleted file mode 100644 index 409e1593f24dea6b9689354d8d7c05ad1772302b..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST := lib-lnet.h lib-types.h lnet.h api-support.h diff --git a/lnet/include/lnet/linux/api-support.h b/lnet/include/lnet/linux/api-support.h deleted file mode 100644 index bec6e34aa1e32fcb8f999cc9ced2e1a9028c9376..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/api-support.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __LINUX_API_SUPPORT_H__ -#define __LINUX_API_SUPPORT_H__ - -#ifndef __LNET_API_SUPPORT_H__ -#error Do not #include this file directly. #include <lnet /api-support.h> instead -#endif - -#ifndef __KERNEL__ -# include <stdio.h> -# include <stdlib.h> -# include <unistd.h> -# include <time.h> - -/* Lots of POSIX dependencies to support PtlEQWait_timeout */ -# include <signal.h> -# include <setjmp.h> -# include <time.h> - -#ifdef HAVE_LIBREADLINE -#define READLINE_LIBRARY -#include <readline/readline.h> - -/* readline.h pulls in a #define that conflicts with one in libcfs.h */ -#undef RETURN - -/* completion_matches() is #if 0-ed out in modern glibc */ -#ifndef completion_matches -# define completion_matches rl_completion_matches -#endif - -#endif /* HAVE_LIBREADLINE */ - -extern void using_history(void); -extern void stifle_history(int); -extern void add_history(char *); - -#endif /* !__KERNEL__ */ - -#endif diff --git a/lnet/include/lnet/linux/lib-lnet.h b/lnet/include/lnet/linux/lib-lnet.h deleted file mode 100644 index 9c38fd3ff226349807dd6d5b9552b56f85d4e71e..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/lib-lnet.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_LINUX_LIB_LNET_H__ -#define __LNET_LINUX_LIB_LNET_H__ - -#ifndef __LNET_LIB_LNET_H__ -#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead -#endif - -#ifdef __KERNEL__ -# include <asm/page.h> -# include <linux/string.h> -# include <asm/io.h> -# include <libcfs/kp30.h> - -static inline __u64 -lnet_page2phys (struct page *p) -{ - /* compiler optimizer will elide unused branches */ - - switch (sizeof(typeof(page_to_phys(p)))) { - case 4: - /* page_to_phys returns a 32 bit physical address. This must - * be a 32 bit machine with <= 4G memory and we must ensure we - * don't sign extend when converting to 64 bits. */ - return (unsigned long)page_to_phys(p); - - case 8: - /* page_to_phys returns a 64 bit physical address :) */ - return page_to_phys(p); - - default: - LBUG(); - return 0; - } -} - -#else /* __KERNEL__ */ -# include <libcfs/list.h> -# include <string.h> -# ifdef HAVE_LIBPTHREAD -# include <pthread.h> -# endif -#endif - -#define LNET_ROUTER - -#endif /* __LNET_LINUX_LIB_LNET_H__ */ diff --git a/lnet/include/lnet/linux/lib-types.h b/lnet/include/lnet/linux/lib-types.h deleted file mode 100644 index 7d28839d97b93387d6aaa34c18ee8e598135c1bb..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/lib-types.h +++ /dev/null @@ -1,20 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_LINUX_LIB_TYPES_H__ -#define __LNET_LINUX_LIB_TYPES_H__ - -#ifndef __LNET_LIB_TYPES_H__ -#error Do not #include this file directly. #include <lnet/lib-types.h> instead -#endif - -#ifdef __KERNEL__ -# include <linux/uio.h> -# include <linux/smp_lock.h> -# include <linux/types.h> -#else -# define LNET_USE_LIB_FREELIST -# include <sys/types.h> -#endif - -#endif diff --git a/lnet/include/lnet/linux/lnet.h b/lnet/include/lnet/linux/lnet.h deleted file mode 100644 index b1aab840c6828622cbb2c6dbdd8c111e9d57a471..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/lnet.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_LINUX_LNET_H__ -#define __LNET_LINUX_LNET_H__ - -#ifndef __LNET_H__ -#error Do not #include this file directly. #include <lnet/lnet.h> instead -#endif - -/* - * lnet.h - * - * User application interface file - */ - -#if defined (__KERNEL__) -#include <linux/uio.h> -#include <linux/types.h> -#else -#include <sys/types.h> -#include <sys/uio.h> -#endif - -#endif diff --git a/lnet/include/lnet/lnet.h b/lnet/include/lnet/lnet.h deleted file mode 100644 index 819c5241f7c926870dba28e137de36cb7efdb279..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/lnet.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_H__ -#define __LNET_H__ - -/* - * lnet.h - * - * User application interface file - */ -#if defined(__linux__) -#include <lnet/linux/lnet.h> -#elif defined(__APPLE__) -#include <lnet/darwin/lnet.h> -#elif defined(__WINNT__) -#include <lnet/winnt/lnet.h> -#else -#error Unsupported Operating System -#endif - -#include <lnet/types.h> -#include <lnet/api.h> - -#endif diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h deleted file mode 100644 index 72b8e474eb9924742ae5906229cfb65df802d499..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/lnetctl.h +++ /dev/null @@ -1,94 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * header for libptlctl.a - */ -#ifndef _PTLCTL_H_ -#define _PTLCTL_H_ - -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> - -#define LNET_DEV_ID 0 -#define LNET_DEV_PATH "/dev/lnet" -#define LNET_DEV_MAJOR 10 -#define LNET_DEV_MINOR 240 -#define OBD_DEV_ID 1 -#define OBD_DEV_PATH "/dev/obd" -#define OBD_DEV_MAJOR 10 -#define OBD_DEV_MINOR 241 -#define SMFS_DEV_ID 2 -#define SMFS_DEV_PATH "/dev/snapdev" -#define SMFS_DEV_MAJOR 10 -#define SMFS_DEV_MINOR 242 - -int ptl_initialize(int argc, char **argv); -int jt_ptl_network(int argc, char **argv); -int jt_ptl_list_nids(int argc, char **argv); -int jt_ptl_which_nid(int argc, char **argv); -int jt_ptl_print_interfaces(int argc, char **argv); -int jt_ptl_add_interface(int argc, char **argv); -int jt_ptl_del_interface(int argc, char **argv); -int jt_ptl_print_peers (int argc, char **argv); -int jt_ptl_add_peer (int argc, char **argv); -int jt_ptl_del_peer (int argc, char **argv); -int jt_ptl_print_connections (int argc, char **argv); -int jt_ptl_disconnect(int argc, char **argv); -int jt_ptl_push_connection(int argc, char **argv); -int jt_ptl_print_active_txs(int argc, char **argv); -int jt_ptl_ping(int argc, char **argv); -int jt_ptl_mynid(int argc, char **argv); -int jt_ptl_add_uuid(int argc, char **argv); -int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ -int jt_ptl_close_uuid(int argc, char **argv); -int jt_ptl_del_uuid(int argc, char **argv); -int jt_ptl_add_route (int argc, char **argv); -int jt_ptl_del_route (int argc, char **argv); -int jt_ptl_notify_router (int argc, char **argv); -int jt_ptl_print_routes (int argc, char **argv); -int jt_ptl_fail_nid (int argc, char **argv); -int jt_ptl_lwt(int argc, char **argv); -int jt_ptl_testprotocompat(int argc, char **argv); -int jt_ptl_memhog(int argc, char **argv); - -int dbg_initialize(int argc, char **argv); -int jt_dbg_filter(int argc, char **argv); -int jt_dbg_show(int argc, char **argv); -int jt_dbg_list(int argc, char **argv); -int jt_dbg_debug_kernel(int argc, char **argv); -int jt_dbg_debug_daemon(int argc, char **argv); -int jt_dbg_debug_file(int argc, char **argv); -int jt_dbg_clear_debug_buf(int argc, char **argv); -int jt_dbg_mark_debug_buf(int argc, char **argv); -int jt_dbg_modules(int argc, char **argv); -int jt_dbg_panic(int argc, char **argv); - -/* l_ioctl.c */ -typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf); -void set_ioc_handler(ioc_handler_t *handler); -int register_ioc_dev(int dev_id, const char * dev_name, int major, int minor); -void unregister_ioc_dev(int dev_id); -int set_ioctl_dump(char * file); -int l_ioctl(int dev_id, unsigned int opc, void *buf); -int parse_dump(char * dump_file, ioc_handler_t ioc_func); -int jt_ioc_dump(int argc, char **argv); -extern char *dump_filename; -int dump(int dev_id, unsigned int opc, void *buf); - -#endif diff --git a/lnet/include/lnet/lnetst.h b/lnet/include/lnet/lnetst.h deleted file mode 100644 index 511c4586053530a0dd88017c57ef7792ba9e71bf..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/lnetst.h +++ /dev/null @@ -1,448 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org - */ - -#ifndef __LNET_ST_H__ -#define __LNET_ST_H__ - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-types.h> - -#define LST_NAME_SIZE 32 /* max name buffer length */ - -#define LSTIO_DEBUG 0xC00 /* debug */ -#define LSTIO_SESSION_NEW 0xC01 /* create session */ -#define LSTIO_SESSION_END 0xC02 /* end session */ -#define LSTIO_SESSION_INFO 0xC03 /* query session */ -#define LSTIO_GROUP_ADD 0xC10 /* add group */ -#define LSTIO_GROUP_LIST 0xC11 /* list all groups in session */ -#define LSTIO_GROUP_INFO 0xC12 /* query defailt infomation of specified group */ -#define LSTIO_GROUP_DEL 0xC13 /* delete group */ -#define LSTIO_NODES_ADD 0xC14 /* add nodes to specified group */ -#define LSTIO_GROUP_UPDATE 0xC15 /* update group */ -#define LSTIO_BATCH_ADD 0xC20 /* add batch */ -#define LSTIO_BATCH_START 0xC21 /* start batch */ -#define LSTIO_BATCH_STOP 0xC22 /* stop batch */ -#define LSTIO_BATCH_DEL 0xC23 /* delete batch */ -#define LSTIO_BATCH_LIST 0xC24 /* show all batches in the session */ -#define LSTIO_BATCH_INFO 0xC25 /* show defail of specified batch */ -#define LSTIO_TEST_ADD 0xC26 /* add test (to batch) */ -#define LSTIO_BATCH_QUERY 0xC27 /* query batch status */ -#define LSTIO_STAT_QUERY 0xC30 /* get stats */ - -typedef struct { - lnet_nid_t ses_nid; /* nid of console node */ - __u64 ses_stamp; /* time stamp */ -} lst_sid_t; /*** session id */ - -#define LST_INVALID_SID ((const lst_sid_t){.ses_nid = LNET_NID_ANY,\ - .ses_stamp = -1}) - -typedef struct { - __u64 bat_id; /* unique id in session */ -} lst_bid_t; /*** batch id (group of tests) */ - -/* Status of test node */ -#define LST_NODE_ACTIVE 0x1 /* node in this session */ -#define LST_NODE_BUSY 0x2 /* node is taken by other session */ -#define LST_NODE_DOWN 0x4 /* node is down */ -#define LST_NODE_UNKNOWN 0x8 /* node not in session */ - -typedef struct { - lnet_process_id_t nde_id; /* id of node */ - int nde_state; /* state of node */ -} lstcon_node_ent_t; /*** node entry, for list_group command */ - -typedef struct { - int nle_nnode; /* # of nodes */ - int nle_nactive; /* # of active nodes */ - int nle_nbusy; /* # of busy nodes */ - int nle_ndown; /* # of down nodes */ - int nle_nunknown; /* # of unknown nodes */ -} lstcon_ndlist_ent_t; /*** node_list entry, for list_batch command */ - -typedef struct { - int tse_type; /* test type */ - int tse_loop; /* loop count */ - int tse_concur; /* concurrency of test */ -} lstcon_test_ent_t; /*** test summary entry, for list_batch command */ - -typedef struct { - int bae_state; /* batch status */ - int bae_timeout; /* batch timeout */ - int bae_ntest; /* # of tests in the batch */ -} lstcon_batch_ent_t; /*** batch summary entry, for list_batch command */ - -typedef struct { - lstcon_ndlist_ent_t tbe_cli_nle; /* client (group) node_list entry */ - lstcon_ndlist_ent_t tbe_srv_nle; /* server (group) node_list entry */ - union { - lstcon_test_ent_t tbe_test; /* test entry */ - lstcon_batch_ent_t tbe_batch; /* batch entry */ - } u; -} lstcon_test_batch_ent_t; /*** test/batch verbose information entry, - *** for list_batch command */ - -typedef struct { - struct list_head rpe_link; /* link chain */ - lnet_process_id_t rpe_peer; /* peer's id */ - struct timeval rpe_stamp; /* time stamp of RPC */ - int rpe_state; /* peer's state */ - int rpe_rpc_errno; /* RPC errno */ - - lst_sid_t rpe_sid; /* peer's session id */ - int rpe_fwk_errno; /* framework errno */ - int rpe_priv[4]; /* private data */ - char rpe_payload[0]; /* private reply payload */ -} lstcon_rpc_ent_t; - -typedef struct { - int trs_rpc_stat[4]; /* RPCs stat (0: total, 1: failed, 2: finished, 4: reserved */ - int trs_rpc_errno; /* RPC errno */ - int trs_fwk_stat[8]; /* framework stat */ - int trs_fwk_errno; /* errno of the first remote error */ - void *trs_fwk_private; /* private framework stat */ -} lstcon_trans_stat_t; - -static inline int -lstcon_rpc_stat_total(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0]; -} - -static inline int -lstcon_rpc_stat_success(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1]; -} - -static inline int -lstcon_rpc_stat_failure(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2]; -} - -static inline int -lstcon_sesop_stat_success(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_sesop_stat_failure(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -static inline int -lstcon_sesqry_stat_active(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_sesqry_stat_busy(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -static inline int -lstcon_sesqry_stat_unknown(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; -} - -static inline int -lstcon_tsbop_stat_success(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_tsbop_stat_failure(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -static inline int -lstcon_tsbqry_stat_idle(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_tsbqry_stat_run(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -static inline int -lstcon_tsbqry_stat_failure(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; -} - -static inline int -lstcon_statqry_stat_success(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_statqry_stat_failure(lstcon_trans_stat_t *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -/* create a session */ -typedef struct { - int lstio_ses_key; /* IN: local key */ - int lstio_ses_timeout; /* IN: session timeout */ - int lstio_ses_force; /* IN: force create ? */ - lst_sid_t *lstio_ses_idp; /* OUT: session id */ - int lstio_ses_nmlen; /* IN: name length */ - char *lstio_ses_namep; /* IN: session name */ -} lstio_session_new_args_t; - -/* query current session */ -typedef struct { - lst_sid_t *lstio_ses_idp; /* OUT: session id */ - int *lstio_ses_keyp; /* OUT: local key */ - lstcon_ndlist_ent_t *lstio_ses_ndinfo; /* OUT: */ - int lstio_ses_nmlen; /* IN: name length */ - char *lstio_ses_namep; /* OUT: session name */ -} lstio_session_info_args_t; - -/* delete a session */ -typedef struct { - int lstio_ses_key; /* IN: session key */ -} lstio_session_end_args_t; - -#define LST_OPC_SESSION 1 -#define LST_OPC_GROUP 2 -#define LST_OPC_NODES 3 -#define LST_OPC_BATCHCLI 4 -#define LST_OPC_BATCHSRV 5 - -typedef struct { - int lstio_dbg_key; /* IN: session key */ - int lstio_dbg_type; /* IN: debug sessin|batch|group|nodes list */ - int lstio_dbg_flags; /* IN: reserved debug flags */ - int lstio_dbg_timeout; /* IN: timeout of debug */ - - int lstio_dbg_nmlen; /* IN: len of name */ - char *lstio_dbg_namep; /* IN: name of group|batch */ - int lstio_dbg_count; /* IN: # of test nodes to debug */ - lnet_process_id_t *lstio_dbg_idsp; /* IN: id of test nodes */ - struct list_head *lstio_dbg_resultp; /* OUT: list head of result buffer */ -} lstio_debug_args_t; - -typedef struct { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_nmlen; /* IN: name length */ - char *lstio_grp_namep; /* IN: group name */ -} lstio_group_add_args_t; - -typedef struct { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_nmlen; /* IN: name length */ - char *lstio_grp_namep; /* IN: group name */ -} lstio_group_del_args_t; - -#define LST_GROUP_CLEAN 1 /* remove inactive nodes in the group */ -#define LST_GROUP_REFRESH 2 /* refresh inactive nodes in the group */ -#define LST_GROUP_RMND 3 /* delete nodes from the group */ - -typedef struct { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_opc; /* IN: OPC */ - int lstio_grp_args; /* IN: arguments */ - int lstio_grp_nmlen; /* IN: name length */ - char *lstio_grp_namep; /* IN: group name */ - int lstio_grp_count; /* IN: # of nodes id */ - lnet_process_id_t *lstio_grp_idsp; /* IN: array of nodes */ - struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */ -} lstio_group_update_args_t; - -typedef struct { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_nmlen; /* IN: name length */ - char *lstio_grp_namep; /* IN: group name */ - int lstio_grp_count; /* IN: # of nodes */ - lnet_process_id_t *lstio_grp_idsp; /* IN: nodes */ - struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */ -} lstio_group_nodes_args_t; - -typedef struct { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_idx; /* IN: group idx */ - int lstio_grp_nmlen; /* IN: name len */ - char *lstio_grp_namep; /* OUT: name */ -} lstio_group_list_args_t; - -typedef struct { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_nmlen; /* IN: name len */ - char *lstio_grp_namep; /* IN: name */ - lstcon_ndlist_ent_t *lstio_grp_entp; /* OUT: description of group */ - - int *lstio_grp_idxp; /* IN/OUT: node index */ - int *lstio_grp_ndentp; /* IN/OUT: # of nodent */ - lstcon_node_ent_t *lstio_grp_dentsp; /* OUT: nodent array */ -} lstio_group_info_args_t; - -#define LST_DEFAULT_BATCH "batch" /* default batch name */ - -typedef struct { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_nmlen; /* IN: name length */ - char *lstio_bat_namep; /* IN: batch name */ -} lstio_batch_add_args_t; - -typedef struct { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_nmlen; /* IN: name length */ - char *lstio_bat_namep; /* IN: batch name */ -} lstio_batch_del_args_t; - -typedef struct { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_timeout; /* IN: timeout for the batch */ - int lstio_bat_nmlen; /* IN: name length */ - char *lstio_bat_namep; /* IN: batch name */ - struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */ -} lstio_batch_run_args_t; - -typedef struct { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_force; /* IN: abort unfinished test RPC */ - int lstio_bat_nmlen; /* IN: name length */ - char *lstio_bat_namep; /* IN: batch name */ - struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */ -} lstio_batch_stop_args_t; - -typedef struct { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_testidx; /* IN: test index */ - int lstio_bat_client; /* IN: is test client? */ - int lstio_bat_timeout; /* IN: timeout for waiting */ - int lstio_bat_nmlen; /* IN: name length */ - char *lstio_bat_namep; /* IN: batch name */ - struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */ -} lstio_batch_query_args_t; - -typedef struct { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_idx; /* IN: index */ - int lstio_bat_nmlen; /* IN: name length */ - char *lstio_bat_namep; /* IN: batch name */ -} lstio_batch_list_args_t; - -typedef struct { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_nmlen; /* IN: name length */ - char *lstio_bat_namep; /* IN: name */ - int lstio_bat_server; /* IN: query server or not */ - int lstio_bat_testidx; /* IN: test index */ - lstcon_test_batch_ent_t *lstio_bat_entp; /* OUT: batch ent */ - - int *lstio_bat_idxp; /* IN/OUT: index of node */ - int *lstio_bat_ndentp; /* IN/OUT: # of nodent */ - lstcon_node_ent_t *lstio_bat_dentsp; /* array of nodent */ -} lstio_batch_info_args_t; - -/* add stat in session */ -typedef struct { - int lstio_sta_key; /* IN: session key */ - int lstio_sta_timeout; /* IN: timeout for stat requst */ - int lstio_sta_nmlen; /* IN: group name length */ - char *lstio_sta_namep; /* IN: group name */ - int lstio_sta_count; /* IN: # of pid */ - lnet_process_id_t *lstio_sta_idsp; /* IN: pid */ - struct list_head *lstio_sta_resultp; /* OUT: list head of result buffer */ -} lstio_stat_args_t; - -typedef enum { - LST_TEST_BULK = 1, - LST_TEST_PING = 2 -} lst_test_type_t; - -/* create a test in a batch */ -#define LST_MAX_CONCUR 1024 /* Max concurrency of test */ - -typedef struct { - int lstio_tes_key; /* IN: session key */ - int lstio_tes_bat_nmlen; /* IN: batch name len */ - char *lstio_tes_bat_name; /* IN: batch name */ - int lstio_tes_type; /* IN: test type */ - int lstio_tes_oneside; /* IN: one sided test */ - int lstio_tes_loop; /* IN: loop count */ - int lstio_tes_concur; /* IN: concurrency */ - - int lstio_tes_dist; /* IN: node distribution in destination groups */ - int lstio_tes_span; /* IN: node span in destination groups */ - int lstio_tes_sgrp_nmlen; /* IN: source group name length */ - char *lstio_tes_sgrp_name; /* IN: group name */ - int lstio_tes_dgrp_nmlen; /* IN: destination group name length */ - char *lstio_tes_dgrp_name; /* IN: group name */ - - int lstio_tes_param_len; /* IN: param buffer len */ - void *lstio_tes_param; /* IN: parameter for specified test: - lstio_bulk_param_t, - lstio_ping_param_t, - ... more */ - int *lstio_tes_retp; /* OUT: private returned value */ - struct list_head *lstio_tes_resultp; /* OUT: list head of result buffer */ -} lstio_test_args_t; - -typedef enum { - LST_BRW_READ = 1, - LST_BRW_WRITE = 2 -} lst_brw_type_t; - -typedef enum { - LST_BRW_CHECK_NONE = 1, - LST_BRW_CHECK_SIMPLE = 2, - LST_BRW_CHECK_FULL = 3 -} lst_brw_flags_t; - -typedef struct { - int blk_opc; /* bulk operation code */ - int blk_size; /* size (bytes) */ - int blk_time; /* time of running the test*/ - int blk_flags; /* reserved flags */ -} lst_test_bulk_param_t; - -typedef struct { - int png_size; /* size of ping message */ - int png_time; /* time */ - int png_loop; /* loop */ - int png_flags; /* reserved flags */ -} lst_test_ping_param_t; - -/* more tests */ - -typedef struct { - __u32 errors; - __u32 rpcs_sent; - __u32 rpcs_rcvd; - __u32 rpcs_dropped; - __u32 rpcs_expired; - __u64 bulk_get; - __u64 bulk_put; -} srpc_counters_t; - -typedef struct { - __u32 active_tests; - __u32 active_batches; - __u32 zombie_sessions; - __u32 brw_errors; - __u32 ping_errors; -} sfw_counters_t; - -#endif diff --git a/lnet/include/lnet/ptllnd.h b/lnet/include/lnet/ptllnd.h deleted file mode 100755 index 2c6263c2ac5df82795b341baa81377457d844396..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/ptllnd.h +++ /dev/null @@ -1,74 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -/* - * The PTLLND was designed to support Portals with - * Lustre and non-lustre UNLINK semantics. - * However for now the two targets are Cray Portals - * on the XT3 and Lustre Portals (for testing) both - * have Lustre UNLINK semantics, so this is defined - * by default. - */ -#define LUSTRE_PORTALS_UNLINK_SEMANTICS - - -#ifdef _USING_LUSTRE_PORTALS_ - -/* NIDs are 64-bits on Lustre Portals */ -#define FMT_NID LPU64 -#define FMT_PID "%d" - -/* When using Lustre Portals Lustre completion semantics are imlicit*/ -#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0 - -#else /* _USING_CRAY_PORTALS_ */ - -/* NIDs are integers on Cray Portals */ -#define FMT_NID "%u" -#define FMT_PID "%d" - -/* When using Cray Portals this is defined in the Cray Portals Header*/ -/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */ - -/* Can compare handles directly on Cray Portals */ -#define PtlHandleIsEqual(a,b) ((a) == (b)) - -/* Diffrent error types on Cray Portals*/ -#define ptl_err_t ptl_ni_fail_t - -/* - * The Cray Portals has no maximum number of IOVs. The - * maximum is limited only my memory and size of the - * int parameters (2^31-1). - * Lustre only really require that the underyling - * implemenation to support at least LNET_MAX_IOV, - * so for Cray portals we can safely just use that - * value here. - * - */ -#define PTL_MD_MAX_IOV LNET_MAX_IOV - -#endif - -#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID - -/* Align incoming small request messages to an 8 byte boundary if this is - * supported to avoid alignment issues on some architectures */ -#ifndef PTL_MD_LOCAL_ALIGN8 -# define PTL_MD_LOCAL_ALIGN8 0 -#endif diff --git a/lnet/include/lnet/ptllnd_wire.h b/lnet/include/lnet/ptllnd_wire.h deleted file mode 100644 index ca9046c15f89e6fa4a6c794ee1ddcd2bb83dc9d6..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/ptllnd_wire.h +++ /dev/null @@ -1,102 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -/* Minimum buffer size that any peer will post to receive ptllnd messages */ -#define PTLLND_MIN_BUFFER_SIZE 256 - -/************************************************************************ - * Tunable defaults that {u,k}lnds/ptllnd should have in common. - */ - -#define PTLLND_PORTAL 9 /* The same portal PTLPRC used when talking to cray portals */ -#define PTLLND_PID 9 /* The Portals PID */ -#define PTLLND_PEERCREDITS 8 /* concurrent sends to 1 peer */ - -/* Default buffer size for kernel ptllnds (guaranteed eager) */ -#define PTLLND_MAX_KLND_MSG_SIZE 512 - -/* Default buffer size for catamount ptllnds (not guaranteed eager) - large - * enough to avoid RDMA for anything sent while control is not in liblustre */ -#define PTLLND_MAX_ULND_MSG_SIZE 512 - - -/************************************************************************ - * Portals LND Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - */ - -#define PTL_RESERVED_MATCHBITS 0x100 /* below this value is reserved - * above is for bulk data transfer */ -#define LNET_MSG_MATCHBITS 0 /* the value for the message channel */ - -typedef struct -{ - lnet_hdr_t kptlim_hdr; /* portals header */ - char kptlim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kptl_immediate_msg_t; - -typedef struct -{ - lnet_hdr_t kptlrm_hdr; /* portals header */ - __u64 kptlrm_matchbits; /* matchbits */ -} WIRE_ATTR kptl_rdma_msg_t; - -typedef struct -{ - __u64 kptlhm_matchbits; /* matchbits */ - __u32 kptlhm_max_msg_size; /* max message size */ -} WIRE_ATTR kptl_hello_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ptlm_magic; /* I'm a Portals LND message */ - __u16 ptlm_version; /* this is my version number */ - __u8 ptlm_type; /* the message type */ - __u8 ptlm_credits; /* returned credits */ - __u32 ptlm_nob; /* # bytes in whole message */ - __u32 ptlm_cksum; /* checksum (0 == no checksum) */ - __u64 ptlm_srcnid; /* sender's NID */ - __u64 ptlm_srcstamp; /* sender's incarnation */ - __u64 ptlm_dstnid; /* destination's NID */ - __u64 ptlm_dststamp; /* destination's incarnation */ - __u32 ptlm_srcpid; /* sender's PID */ - __u32 ptlm_dstpid; /* destination's PID */ - - union { - kptl_immediate_msg_t immediate; - kptl_rdma_msg_t rdma; - kptl_hello_msg_t hello; - } WIRE_ATTR ptlm_u; - -} kptl_msg_t; - -#define PTLLND_MSG_MAGIC LNET_PROTO_PTL_MAGIC -#define PTLLND_MSG_VERSION 0x04 - -#define PTLLND_RDMA_OK 0x00 -#define PTLLND_RDMA_FAIL 0x01 - -#define PTLLND_MSG_TYPE_INVALID 0x00 -#define PTLLND_MSG_TYPE_PUT 0x01 -#define PTLLND_MSG_TYPE_GET 0x02 -#define PTLLND_MSG_TYPE_IMMEDIATE 0x03 /* No bulk data xfer*/ -#define PTLLND_MSG_TYPE_NOOP 0x04 -#define PTLLND_MSG_TYPE_HELLO 0x05 -#define PTLLND_MSG_TYPE_NAK 0x06 - diff --git a/lnet/include/lnet/socklnd.h b/lnet/include/lnet/socklnd.h deleted file mode 100644 index fbeea157970b5328602c221889d29f6013ff6fb9..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/socklnd.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * <lnet/socklnd.h> - * - * #defines shared between socknal implementation and utilities - */ -#ifndef __LNET_LNET_SOCKLND_H__ -#define __LNET_LNET_SOCKLND_H__ - -#include <lnet/types.h> -#include <lnet/lib-types.h> - -#define SOCKLND_CONN_NONE (-1) -#define SOCKLND_CONN_ANY 0 -#define SOCKLND_CONN_CONTROL 1 -#define SOCKLND_CONN_BULK_IN 2 -#define SOCKLND_CONN_BULK_OUT 3 -#define SOCKLND_CONN_NTYPES 4 - -typedef struct { - __u32 kshm_magic; /* magic number of socklnd message */ - __u32 kshm_version; /* version of socklnd message */ - lnet_nid_t kshm_src_nid; /* sender's nid */ - lnet_nid_t kshm_dst_nid; /* destination nid */ - lnet_pid_t kshm_src_pid; /* sender's pid */ - lnet_pid_t kshm_dst_pid; /* destination pid */ - __u64 kshm_src_incarnation; /* sender's incarnation */ - __u64 kshm_dst_incarnation; /* destination's incarnation */ - __u32 kshm_ctype; /* connection type */ - __u32 kshm_nips; /* # IP addrs */ - __u32 kshm_ips[0]; /* IP addrs */ -} WIRE_ATTR ksock_hello_msg_t; - -typedef struct { - lnet_hdr_t ksnm_hdr; /* lnet hdr */ - char ksnm_payload[0];/* lnet payload */ -} WIRE_ATTR ksock_lnet_msg_t; - -typedef struct { - __u32 ksm_type; /* type of socklnd message */ - __u32 ksm_csum; /* checksum if != 0 */ - __u64 ksm_zc_req_cookie; /* ack required if != 0 */ - __u64 ksm_zc_ack_cookie; /* ack if != 0 */ - union { - ksock_lnet_msg_t lnetmsg; /* lnet message, it's empty if it's NOOP */ - } WIRE_ATTR ksm_u; -} WIRE_ATTR ksock_msg_t; - -#define KSOCK_MSG_NOOP 0xc0 /* ksm_u empty */ -#define KSOCK_MSG_LNET 0xc1 /* lnet msg */ - -/* We need to know this number to parse hello msg from ksocklnd in - * other LND (usocklnd, for example) */ -#define KSOCK_PROTO_V2 2 - -#endif diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h deleted file mode 100644 index d080c9690016f8d7f6e534931f72013e13958290..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/types.h +++ /dev/null @@ -1,168 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_TYPES_H__ -#define __LNET_TYPES_H__ - -#include <libcfs/libcfs.h> - -#define LNET_RESERVED_PORTAL 0 /* portals reserved for lnet's own use */ - -typedef __u64 lnet_nid_t; -typedef __u32 lnet_pid_t; - -#define LNET_NID_ANY ((lnet_nid_t) -1) -#define LNET_PID_ANY ((lnet_pid_t) -1) - -#ifdef CRAY_XT3 -typedef __u32 lnet_uid_t; -#define LNET_UID_ANY ((lnet_uid_t) -1) -#endif - -#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */ -#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */ - -#define LNET_TIME_FOREVER (-1) - -typedef struct { - __u64 cookie; -} lnet_handle_any_t; - -typedef lnet_handle_any_t lnet_handle_eq_t; -typedef lnet_handle_any_t lnet_handle_md_t; -typedef lnet_handle_any_t lnet_handle_me_t; - -#define LNET_INVALID_HANDLE \ - ((const lnet_handle_any_t){.cookie = -1}) -#define LNET_EQ_NONE LNET_INVALID_HANDLE - -static inline int LNetHandleIsEqual (lnet_handle_any_t h1, lnet_handle_any_t h2) -{ - return (h1.cookie == h2.cookie); -} - -typedef struct { - lnet_nid_t nid; - lnet_pid_t pid; /* node id / process id */ -} lnet_process_id_t; - -typedef enum { - LNET_RETAIN = 0, - LNET_UNLINK -} lnet_unlink_t; - -typedef enum { - LNET_INS_BEFORE, - LNET_INS_AFTER -} lnet_ins_pos_t; - -typedef struct { - void *start; - unsigned int length; - int threshold; - int max_size; - unsigned int options; - void *user_ptr; - lnet_handle_eq_t eq_handle; -} lnet_md_t; - -/* Max Transfer Unit (minimum supported everywhere) */ -#define LNET_MTU_BITS 20 -#define LNET_MTU (1<<LNET_MTU_BITS) - -/* limit on the number of entries in discontiguous MDs */ -#define LNET_MAX_IOV 256 - -/* Max payload size */ -#ifndef LNET_MAX_PAYLOAD -# error "LNET_MAX_PAYLOAD must be defined in config.h" -#else -# if (LNET_MAX_PAYLOAD < LNET_MTU) -# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" -# elif defined(__KERNEL__) -# if (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) -/* PAGE_SIZE is a constant: check with cpp! */ -# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" -# endif -# endif -#endif - -/* Options for the MD structure */ -#define LNET_MD_OP_PUT (1 << 0) -#define LNET_MD_OP_GET (1 << 1) -#define LNET_MD_MANAGE_REMOTE (1 << 2) -/* unused (1 << 3) */ -#define LNET_MD_TRUNCATE (1 << 4) -#define LNET_MD_ACK_DISABLE (1 << 5) -#define LNET_MD_IOVEC (1 << 6) -#define LNET_MD_MAX_SIZE (1 << 7) -#define LNET_MD_KIOV (1 << 8) - -/* For compatibility with Cray Portals */ -#define LNET_MD_PHYS 0 - -#define LNET_MD_THRESH_INF (-1) - -/* NB lustre portals uses struct iovec internally! */ -typedef struct iovec lnet_md_iovec_t; - -typedef struct { - cfs_page_t *kiov_page; - unsigned int kiov_len; - unsigned int kiov_offset; -} lnet_kiov_t; - -typedef enum { - LNET_EVENT_GET, - LNET_EVENT_PUT, - LNET_EVENT_REPLY, - LNET_EVENT_ACK, - LNET_EVENT_SEND, - LNET_EVENT_UNLINK, -} lnet_event_kind_t; - -#define LNET_SEQ_BASETYPE long -typedef unsigned LNET_SEQ_BASETYPE lnet_seq_t; -#define LNET_SEQ_GT(a,b) (((signed LNET_SEQ_BASETYPE)((a) - (b))) > 0) - -/* XXX - * cygwin need the pragma line, not clear if it's needed in other places. - * checking!!! - */ -#ifdef __CYGWIN__ -#pragma pack(push, 4) -#endif -typedef struct { - lnet_process_id_t target; - lnet_process_id_t initiator; - lnet_nid_t sender; - lnet_event_kind_t type; - unsigned int pt_index; - __u64 match_bits; - unsigned int rlength; - unsigned int mlength; - lnet_handle_md_t md_handle; - lnet_md_t md; - __u64 hdr_data; - int status; - int unlinked; - unsigned int offset; -#ifdef CRAY_XT3 - lnet_uid_t uid; -#endif - - volatile lnet_seq_t sequence; -} lnet_event_t; -#ifdef __CYGWIN__ -#pragma pop -#endif - -typedef enum { - LNET_ACK_REQ, - LNET_NOACK_REQ -} lnet_ack_req_t; - -typedef void (*lnet_eq_handler_t)(lnet_event_t *event); -#define LNET_EQ_HANDLER_NONE NULL - -#endif diff --git a/lnet/include/lnet/winnt/api-support.h b/lnet/include/lnet/winnt/api-support.h deleted file mode 100644 index 8806981b1a173454b2c2496281952279f9e79d61..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/winnt/api-support.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __WINNT_API_SUPPORT_H__ -#define __WINNT_API_SUPPORT_H__ - -#ifndef __LNET_API_SUPPORT_H__ -#error Do not #include this file directly. #include <lnet/api-support.h> instead -#endif - - -#endif diff --git a/lnet/include/lnet/winnt/lib-lnet.h b/lnet/include/lnet/winnt/lib-lnet.h deleted file mode 100644 index bb3e5af83089f30309af97020472eebf600a611e..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/winnt/lib-lnet.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_WINNT_LIB_LNET_H__ -#define __LNET_WINNT_LIB_LNET_H__ - -#ifndef __LNET_LIB_LNET_H__ -#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead -#endif - -#ifdef __KERNEL__ -# include <libcfs/libcfs.h> -# include <libcfs/kp30.h> - -static inline __u64 -lnet_page2phys (struct page *p) -{ - return 0; -} - -#else /* __KERNEL__ */ - -#endif - -#endif /* __LNET_WINNT_LIB_LNET_H__ */ diff --git a/lnet/include/lnet/winnt/lib-types.h b/lnet/include/lnet/winnt/lib-types.h deleted file mode 100644 index 33a31341286598ee95e2fce7ff970a6119849639..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/winnt/lib-types.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __LNET_WINNT_LIB_TYPES_H__ -#define __LNET_WINNT_LIB_TYPES_H__ - -#ifndef __LNET_LIB_TYPES_H__ -#error Do not #include this file directly. #include <lnet/lib-types.h> instead -#endif - -#include <libcfs/libcfs.h> - -typedef struct { - spinlock_t lock; -} lib_ni_lock_t; - -static inline void lib_ni_lock_init(lib_ni_lock_t *l) -{ - spin_lock_init(&l->lock); -} - -static inline void lib_ni_lock_fini(lib_ni_lock_t *l) -{} - -static inline void lib_ni_lock(lib_ni_lock_t *l) -{ - int flags; - spin_lock_irqsave(&l->lock, flags); -} - -static inline void lib_ni_unlock(lib_ni_lock_t *l) -{ - spin_unlock_irqrestore(&l->lock, 0); -} - -#endif diff --git a/lnet/include/lnet/winnt/lnet.h b/lnet/include/lnet/winnt/lnet.h deleted file mode 100644 index 7a3d24db2ce8bcd1106ca8e4a15f3e05865febeb..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/winnt/lnet.h +++ /dev/null @@ -1,511 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_LINUX_LNET_H__ -#define __LNET_LINUX_LNET_H__ - -#ifndef __LNET_H__ -#error Do not #include this file directly. #include <lnet/lnet.h> instead -#endif - -#ifdef __KERNEL__ - -#include <libcfs/libcfs.h> -#include <lnet/lib-lnet.h> - -/* - * tdilnd routines - */ - - -PUCHAR -KsNtStatusToString (IN NTSTATUS Status); - - -VOID -KsPrintf( - IN LONG DebugPrintLevel, - IN PCHAR DebugMessage, - IN ... - ); - - -ksock_mdl_t * -ks_lock_iovs( - IN struct iovec *iov, - IN int niov, - IN int recv, - IN int * len - ); - -ksock_mdl_t * -ks_lock_kiovs( - IN lnet_kiov_t * kiov, - IN int nkiov, - IN int recv, - IN int * len - ); - -int -ks_send_mdl( - ksock_tconn_t * tconn, - void * tx, - ksock_mdl_t * mdl, - int len, - int flags - ); - -int -ks_query_data( - ksock_tconn_t * tconn, - size_t * size, - int bIsExpedited); - -int -ks_recv_mdl( - ksock_tconn_t * tconn, - ksock_mdl_t * mdl, - int size, - int flags - ); - -int -ks_get_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - PULONG Length - ); - -NTSTATUS -ks_set_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - ULONG Length - ); - -int -ks_bind_tconn ( - ksock_tconn_t * tconn, - ksock_tconn_t * parent, - ulong_ptr addr, - unsigned short port - ); - -int -ks_build_tconn( - ksock_tconn_t * tconn, - ulong_ptr addr, - unsigned short port - ); - -int -ks_disconnect_tconn( - ksock_tconn_t * tconn, - ulong_ptr flags - ); - -void -ks_abort_tconn( - ksock_tconn_t * tconn - ); - -int -ks_query_local_ipaddr( - ksock_tconn_t * tconn - ); - -int -ks_tconn_write (ksock_tconn_t *tconn, void *buffer, int nob); - -int -ks_tconn_read (ksock_tconn_t * tconn, void *buffer, int nob); - -NTSTATUS -KsTcpCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ); - -NTSTATUS -KsDisconectCompletionRoutine ( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ); - -NTSTATUS -KsTcpReceiveCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ); - -NTSTATUS -KsTcpSendCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ); - -NTSTATUS -KsAcceptCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ); - - -NTSTATUS -KsConnectEventHandler( - IN PVOID TdiEventContext, - IN LONG RemoteAddressLength, - IN PVOID RemoteAddress, - IN LONG UserDataLength, - IN PVOID UserData, - IN LONG OptionsLength, - IN PVOID Options, - OUT CONNECTION_CONTEXT * ConnectionContext, - OUT PIRP * AcceptIrp - ); - -NTSTATUS -KsDisconnectEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN LONG DisconnectDataLength, - IN PVOID DisconnectData, - IN LONG DisconnectInformationLength, - IN PVOID DisconnectInformation, - IN ULONG DisconnectFlags - ); - -NTSTATUS -KsTcpReceiveEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ); - -NTSTATUS -KsTcpReceiveExpeditedEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ); - -NTSTATUS -KsTcpChainedReceiveEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ); - -NTSTATUS -KsTcpChainedReceiveExpeditedEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ); - - - -VOID -KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem); - - -ULONG -ks_tdi_send_flags(ULONG SockFlags); - -PIRP -KsBuildTdiIrp( - IN PDEVICE_OBJECT DeviceObject - ); - -NTSTATUS -KsSubmitTdiIrp( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN BOOLEAN bSynchronous, - OUT PULONG Information - ); - -NTSTATUS -KsOpenControl( - IN PUNICODE_STRING DeviceName, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ); - -NTSTATUS -KsCloseControl( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ); - -NTSTATUS -KsOpenAddress( - IN PUNICODE_STRING DeviceName, - IN PTRANSPORT_ADDRESS pAddress, - IN ULONG AddressLength, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ); - -NTSTATUS -KsCloseAddress( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ); - -NTSTATUS -KsOpenConnection( - IN PUNICODE_STRING DeviceName, - IN CONNECTION_CONTEXT ConnectionContext, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ); - -NTSTATUS -KsCloseConnection( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ); - -NTSTATUS -KsAssociateAddress( - IN HANDLE AddressHandle, - IN PFILE_OBJECT ConnectionObject - ); - - -NTSTATUS -KsDisassociateAddress( - IN PFILE_OBJECT ConnectionObject - ); - - -NTSTATUS -KsSetEventHandlers( - IN PFILE_OBJECT AddressObject, - IN PVOID EventContext, - IN PKS_EVENT_HANDLERS Handlers - ); - - -NTSTATUS -KsQueryProviderInfo( - PWSTR TdiDeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ); - -NTSTATUS -KsQueryAddressInfo( - IN PFILE_OBJECT FileObject, - OUT PTDI_ADDRESS_INFO AddressInfo, - OUT PULONG AddressSize - ); - -NTSTATUS -KsQueryConnectionInfo( - IN PFILE_OBJECT ConnectionObject, - OUT PTDI_CONNECTION_INFO ConnectionInfo, - OUT PULONG ConnectionSize - ); - -ULONG -KsInitializeTdiAddress( - IN OUT PTA_IP_ADDRESS pTransportAddress, - IN ULONG IpAddress, - IN USHORT IpPort - ); - -ULONG -KsQueryMdlsSize (IN PMDL Mdl); - - -ULONG -KsQueryTdiAddressLength( - OUT PTRANSPORT_ADDRESS pTransportAddress - ); - -NTSTATUS -KsQueryIpAddress( - IN PFILE_OBJECT FileObject, - OUT PVOID TdiAddress, - OUT ULONG* AddressLength - ); - - -NTSTATUS -KsErrorEventHandler( - IN PVOID TdiEventContext, - IN NTSTATUS Status - ); - -int -ks_set_handlers( - ksock_tconn_t * tconn - ); - - -VOID -KsPrintProviderInfo( - PWSTR DeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ); - -ksock_tconn_t * -ks_create_tconn(); - -void -ks_free_tconn( - ksock_tconn_t * tconn - ); - -void -ks_init_listener( - ksock_tconn_t * tconn - ); - -void -ks_init_sender( - ksock_tconn_t * tconn - ); - -void -ks_init_child( - ksock_tconn_t * tconn - ); - -void -ks_get_tconn( - ksock_tconn_t * tconn - ); - -void -ks_put_tconn( - ksock_tconn_t * tconn - ); - -int -ks_reset_handlers( - ksock_tconn_t * tconn - ); - -void -ks_destroy_tconn( - ksock_tconn_t * tconn - ); - - -PKS_TSDU -KsAllocateKsTsdu(); - -VOID -KsPutKsTsdu( - PKS_TSDU KsTsdu - ); - -VOID -KsFreeKsTsdu( - PKS_TSDU KsTsdu - ); - -VOID -KsInitializeKsTsdu( - PKS_TSDU KsTsdu, - ULONG Length - ); - - -VOID -KsInitializeKsTsduMgr( - PKS_TSDUMGR TsduMgr - ); - -VOID -KsInitializeKsChain( - PKS_CHAIN KsChain - ); - -NTSTATUS -KsCleanupTsduMgr( - PKS_TSDUMGR KsTsduMgr - ); - -NTSTATUS -KsCleanupKsChain( - PKS_CHAIN KsChain - ); - -NTSTATUS -KsCleanupTsdu( - ksock_tconn_t * tconn - ); - -NTSTATUS -KsCopyMdlChainToMdlChain( - IN PMDL SourceMdlChain, - IN ULONG SourceOffset, - IN PMDL DestinationMdlChain, - IN ULONG DestinationOffset, - IN ULONG BytesTobecopied, - OUT PULONG BytesCopied - ); - -ULONG -KsQueryMdlsSize (PMDL Mdl); - -NTSTATUS -KsLockUserBuffer ( - IN PVOID UserBuffer, - IN BOOLEAN bPaged, - IN ULONG Length, - IN LOCK_OPERATION Operation, - OUT PMDL * pMdl - ); - -PVOID -KsMapMdlBuffer (PMDL Mdl); - -VOID -KsReleaseMdl ( IN PMDL Mdl, - IN int Paged ); - -int -ks_lock_buffer ( - void * buffer, - int paged, - int length, - LOCK_OPERATION access, - ksock_mdl_t ** kmdl - ); - -void * -ks_map_mdl (ksock_mdl_t * mdl); - -void -ks_release_mdl (ksock_mdl_t *mdl, int paged); - -#endif /* __KERNEL__ */ - -#endif diff --git a/lnet/klnds/.cvsignore b/lnet/klnds/.cvsignore deleted file mode 100644 index f5fd0b02c2417a69331ad6a19e2e5d033590cc47..0000000000000000000000000000000000000000 --- a/lnet/klnds/.cvsignore +++ /dev/null @@ -1,5 +0,0 @@ -Makefile -autoMakefile -autoMakefile.in -.*.cmd -.depend diff --git a/lnet/klnds/Makefile.in b/lnet/klnds/Makefile.in deleted file mode 100644 index d4e034cd46164cbba9c9904c2ead1dbd0e813c59..0000000000000000000000000000000000000000 --- a/lnet/klnds/Makefile.in +++ /dev/null @@ -1,13 +0,0 @@ -@BUILD_GMLND_TRUE@subdir-m += gmlnd -@BUILD_MXLND_TRUE@subdir-m += mxlnd -@BUILD_RALND_TRUE@subdir-m += ralnd -@BUILD_O2IBLND_TRUE@subdir-m += o2iblnd -@BUILD_OPENIBLND_TRUE@subdir-m += openiblnd -@BUILD_CIBLND_TRUE@subdir-m += ciblnd -@BUILD_IIBLND_TRUE@subdir-m += iiblnd -@BUILD_VIBLND_TRUE@subdir-m += viblnd -@BUILD_QSWLND_TRUE@subdir-m += qswlnd -@BUILD_PTLLND_TRUE@subdir-m += ptllnd -subdir-m += socklnd - -@INCLUDE_RULES@ diff --git a/lnet/klnds/autoMakefile.am b/lnet/klnds/autoMakefile.am deleted file mode 100644 index e6d0146e6527ad1da7da2785c0b193e7dffd9377..0000000000000000000000000000000000000000 --- a/lnet/klnds/autoMakefile.am +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = socklnd qswlnd gmlnd mxlnd openiblnd iiblnd viblnd ralnd ptllnd ciblnd o2iblnd diff --git a/lnet/klnds/ciblnd/.cvsignore b/lnet/klnds/ciblnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/ciblnd/Makefile.in b/lnet/klnds/ciblnd/Makefile.in deleted file mode 100644 index 55311ad06da9fffb43246e5996d96c518693f6e2..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/Makefile.in +++ /dev/null @@ -1,8 +0,0 @@ -MODULES := kciblnd -kciblnd-objs := ciblnd.o ciblnd_cb.o ciblnd_modparams.o - -default: all - -EXTRA_POST_CFLAGS := @CIBCPPFLAGS@ -I@LUSTRE@/../lnet/klnds/openiblnd - -@INCLUDE_RULES@ diff --git a/lnet/klnds/ciblnd/autoMakefile.am b/lnet/klnds/ciblnd/autoMakefile.am deleted file mode 100644 index cae5cfc66c8bc6ce4c174455b7369ba12ec29f11..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/autoMakefile.am +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_CIBLND -modulenet_DATA = kciblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kciblnd-objs:%.o=%.c) - diff --git a/lnet/klnds/ciblnd/ciblnd.c b/lnet/klnds/ciblnd/ciblnd.c deleted file mode 100644 index e13948441b6c13ef4848bfa87c9731d72b4155cc..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/ciblnd.c +++ /dev/null @@ -1 +0,0 @@ -#include "openiblnd.c" diff --git a/lnet/klnds/ciblnd/ciblnd_cb.c b/lnet/klnds/ciblnd/ciblnd_cb.c deleted file mode 100644 index 893e16d655aed13d3cff9eaf79822b0f4689ffe7..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/ciblnd_cb.c +++ /dev/null @@ -1 +0,0 @@ -#include "openiblnd_cb.c" diff --git a/lnet/klnds/ciblnd/ciblnd_modparams.c b/lnet/klnds/ciblnd/ciblnd_modparams.c deleted file mode 100644 index a0c6b1fab7322550e2f928d752ba43c47214a55a..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/ciblnd_modparams.c +++ /dev/null @@ -1 +0,0 @@ -#include "openiblnd_modparams.c" diff --git a/lnet/klnds/gmlnd/.cvsignore b/lnet/klnds/gmlnd/.cvsignore deleted file mode 100644 index 642e2e6cc0e58fd056bd3c99bd1fa72521b9e8b7..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.cmd -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/gmlnd/Makefile.in b/lnet/klnds/gmlnd/Makefile.in deleted file mode 100644 index 1aec50d5f08f281a6da9941e19c0df0474859767..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kgmlnd -kgmlnd-objs := gmlnd_api.o gmlnd_cb.o gmlnd_comm.o gmlnd_utils.o gmlnd_module.o - -EXTRA_PRE_CFLAGS := @GMCPPFLAGS@ -DGM_KERNEL - -@INCLUDE_RULES@ diff --git a/lnet/klnds/gmlnd/README b/lnet/klnds/gmlnd/README deleted file mode 100644 index ac2e23ddb7c8b1671b60ea3c84eb26c5deebaa4e..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/README +++ /dev/null @@ -1,73 +0,0 @@ -1. This version of the GM nal requires an unreleased extension to the GM API to - map physical memory: gm_register_memory_ex_phys(). This allows it to avoid - ENOMEM problems associated with large contiguous buffer allocation. - -2. ./configure --with-gm=<path-to-gm-source-tree> \ - [--with-gm-install=<path-to-gm-installation>] - - If the sources do not support gm_register_memory_ex_phys(), configure flags - an error. In this case you should apply the patch and rebuild and re-install - GM as directed in the error message. - - By default GM is installed in /opt/gm. If an alternate path was specified to - <GM-sources>/binary/GM_INSTALL, you should also specify --with-gm-install - with the same path. - -3. The GM timeout is 300 seconds; i.e. the network may not release resources - claimed by communications stalled with a crashing node for this time. - Default gmnal buffer tuning parameters (see (4) below) have been chosen to - minimize this problem and prevent lustre having to block for resources. - However in some situations, where all network buffers are busy, the default - lustre timeout (various, scaled from the base timeout of 100 seconds) may be - too small and the only solution may be to increase the lustre timeout - dramatically. - -4. The gmnal has the following module parameters... - - gmnal_port The GM port that the NAL will use (default 4) - Change this if it conflicts with site usage. - - gmnal_ntx The number of "normal" transmit descriptors (default - 32). When this pool is exhausted, threads sending - and receiving on the network block until in-progress - transmits have completed. Each descriptor consumes 1 - GM_MTU sized buffer. - - gmnal_ntx_nblk The number of "reserved" transmit descriptors - (default 256). This pool is reserved for responses to - incoming communications that may not block. Increase - only if console error messages indicates the pool - has been exhausted (LustreError: Can't get tx for - msg type...) Each descriptor consumes 1 GM_MTU sized - buffer. - - gmnal_nlarge_tx_bufs The number of 1MByte transmit buffers to reserve at - startup (default 32). This controls the number of - concurrent sends larger that GM_MTU. It can be - reduced to conserve memory, or increased to increase - large message sending concurrency. - - gmnal_nrx_small The number of GM_MTU sized receive buffers posted to - receive from the network (default 128). Increase if - congestion is suspected, however note that the total - number of receives that can be posted at any time is - limited by the number of GM receive tokens - available. If there are too few, this, and - gmnal_nrx_large are scaled back accordingly. - - gmnal_nrx_large The number of 1MByte receive buffers posted to - receive from the network (default 64). Increase if - the number of OST threads is increased. But note - that the total number of receives that can be posted - at any time is limited by the number of GM receive - tokens available. If there are too few, this, and - gmnal_nrx_small are scaled back accordingly. - -5. Network configuration for GM is done in an lmc script as follows... - - GM2NID=${path-to-lustre-tree}/portals/utils/gmnalnid - - ${LMC} --node some_server --add net --nettype gm --nid `$GM2NID -n some_server` - - ${LMC} --node client --add net --nettype gm --nid '*' - diff --git a/lnet/klnds/gmlnd/autoMakefile.am b/lnet/klnds/gmlnd/autoMakefile.am deleted file mode 100644 index 6ff7933633ee7a00f2a5f252ec0c0c7ba48f31fb..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_GMLND -modulenet_DATA = kgmlnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kgmlnd-objs:%.o=%.c) gmlnd.h diff --git a/lnet/klnds/gmlnd/gm-reg-phys.patch b/lnet/klnds/gmlnd/gm-reg-phys.patch deleted file mode 100644 index df32a219361c34d3ecbe8126e962c63465595057..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gm-reg-phys.patch +++ /dev/null @@ -1,107 +0,0 @@ -Index: libgm/gm_register.c -=================================================================== -RCS file: /repository/gm/libgm/gm_register.c,v -retrieving revision 1.9.16.3 -diff -u -r1.9.16.3 gm_register.c ---- libgm/gm_register.c 9 Aug 2005 14:37:02 -0000 1.9.16.3 -+++ libgm/gm_register.c 25 Aug 2005 21:35:58 -0000 -@@ -77,20 +77,14 @@ - - */ - --GM_ENTRY_POINT --gm_status_t --gm_register_memory_ex (gm_port_t *p, void *_ptr, gm_size_t length, void *_pvma) -+static gm_status_t -+_gm_register_memory (gm_port_t *p, int is_physical, gm_u64_t ptr, gm_size_t length, gm_up_t pvma) - { - gm_status_t status; -- gm_up_t ptr; -- gm_up_t pvma; - - GM_CALLED_WITH_ARGS (("%p,%p,"GM_U64_TMPL",%p", - p, _ptr, GM_U64_ARG (length), _pvma)); - -- ptr = GM_PTR_TO_UP (_ptr); -- pvma = GM_PTR_TO_UP (_pvma); -- - #if !GM_KERNEL && !GM_CAN_REGISTER_MEMORY - GM_PARAMETER_MAY_BE_UNUSED (p); - GM_PARAMETER_MAY_BE_UNUSED (ptr); -@@ -160,7 +154,7 @@ - status = gm_add_mapping_to_page_table (ps, - ptr + offset, - pvma + offset, -- GM_INVALID_DMA_PAGE); -+ is_physical ? ptr + offset : GM_INVALID_DMA_PAGE); - if (status != GM_SUCCESS) - { - status = GM_INVALID_PARAMETER; -@@ -317,13 +311,31 @@ - - */ - -+#if GM_KERNEL && (GM_CPU_x86 || GM_CPU_x86_64 || GM_CPU_ia64) -+/* only architecture where pci bus addr == physical address can use -+ such a simple scheme */ -+GM_ENTRY_POINT gm_status_t -+gm_register_memory_ex_phys (struct gm_port *p, -+ gm_u64_t phys, gm_size_t length, -+ gm_up_t pvma) -+{ -+ return _gm_register_memory(p, 1, phys, length, (gm_size_t)pvma); -+} -+#endif -+ -+GM_ENTRY_POINT gm_status_t -+gm_register_memory_ex (gm_port_t *p, void *ptr, gm_size_t length, void *pvma) -+{ -+ return _gm_register_memory(p, 0, (gm_size_t)ptr, length, (gm_size_t)pvma); -+} -+ - GM_ENTRY_POINT gm_status_t - gm_register_memory (gm_port_t *p, void *ptr, gm_size_t length) - { - gm_status_t status; - - GM_CALLED_WITH_ARGS (("%p,%p,"GM_U64_TMPL, p, ptr, GM_U64_ARG (length))); -- status = gm_register_memory_ex (p, ptr, length, ptr); -+ status = _gm_register_memory(p, 0, (gm_size_t)ptr, length, (gm_size_t)ptr); - GM_RETURN_STATUS (status); - } - -Index: include/gm.h -=================================================================== -RCS file: /repository/gm/include/gm.h,v -retrieving revision 1.25.10.11 -diff -u -r1.25.10.11 gm.h ---- include/gm.h 14 Mar 2005 21:42:41 -0000 1.25.10.11 -+++ include/gm.h 25 Aug 2005 21:35:58 -0000 -@@ -2676,6 +2676,10 @@ - GM_ENTRY_POINT gm_status_t gm_register_memory_ex (struct gm_port *p, - void *ptr, gm_size_t length, - void *pvma); -+ -+GM_ENTRY_POINT gm_status_t gm_register_memory_ex_phys (struct gm_port *p, -+ gm_u64_t phys, gm_size_t length, -+ gm_up_t pvma); - #endif /* GM_API_VERSION >= GM_API_VERSION_2_0_6 */ - - #if GM_API_VERSION >= GM_API_VERSION_2_1_0 -Index: libgm/gm_reference_api.c -=================================================================== -RCS file: /repository/gm/libgm/gm_reference_api.c,v -retrieving revision 1.3.14.1 -diff -u -r1.3.14.1 gm_reference_api.c ---- libgm/gm_reference_api.c 23 Apr 2004 20:27:29 -0000 1.3.14.1 -+++ libgm/gm_reference_api.c 25 Aug 2005 22:39:20 -0000 -@@ -154,6 +154,9 @@ - GM_REF (gm_register_buffer); - GM_REF (gm_register_memory); - GM_REF (gm_register_memory_ex); -+#if GM_KERNEL && (GM_CPU_x86 || GM_CPU_x86_64 || GM_CPU_ia64) -+GM_REF (gm_register_memory_ex_phys); -+#endif - GM_REF (gm_resume_sending); - GM_REF (gm_send); - GM_REF (gm_send_to_peer); diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h deleted file mode 100644 index b47fd8fd098d1c24b1210cfdb4837a569e0ce8aa..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd.h +++ /dev/null @@ -1,248 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -/* - * Portals GM kernel NAL header file - * This file makes all declaration and prototypes - * for the API side and CB side of the NAL - */ -#ifndef __INCLUDE_GMNAL_H__ -#define __INCLUDE_GMNAL_H__ - -/* XXX Lustre as of V1.2.2 drop defines VERSION, which causes problems - * when including <GM>/include/gm_lanai.h which defines a structure field - * with the name VERSION XXX */ -#ifdef VERSION -# undef VERSION -#endif - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include "linux/module.h" -#include "linux/tty.h" -#include "linux/kernel.h" -#include "linux/mm.h" -#include "linux/string.h" -#include "linux/stat.h" -#include "linux/errno.h" -#include "linux/version.h" -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#include "linux/buffer_head.h" -#include "linux/fs.h" -#else -#include "linux/locks.h" -#endif -#include "linux/unistd.h" -#include "linux/init.h" -#include "linux/sem.h" -#include "linux/vmalloc.h" -#include "linux/sysctl.h" - -#define DEBUG_SUBSYSTEM S_LND - -#include "libcfs/kp30.h" -#include "lnet/lnet.h" -#include "lnet/lib-lnet.h" - -/* undefine these before including the GM headers which clash */ -#undef PACKAGE_BUGREPORT -#undef PACKAGE_NAME -#undef PACKAGE_STRING -#undef PACKAGE_TARNAME -#undef PACKAGE_VERSION - -#define GM_STRONG_TYPES 1 -#ifdef VERSION -#undef VERSION -#endif -#include "gm.h" -#include "gm_internal.h" - -/* Fixed tunables */ -#define GMNAL_RESCHED 100 /* # busy loops to force scheduler to yield */ -#define GMNAL_NETADDR_BASE 0x10000000 /* where we start in network VM */ -#define GMNAL_LARGE_PRIORITY GM_LOW_PRIORITY /* large message GM priority */ -#define GMNAL_SMALL_PRIORITY GM_LOW_PRIORITY /* small message GM priority */ - -/* Wire protocol */ -typedef struct { - lnet_hdr_t gmim_hdr; /* portals header */ - char gmim_payload[0]; /* payload */ -} gmnal_immediate_msg_t; - -typedef struct { - /* First 2 fields fixed FOR ALL TIME */ - __u32 gmm_magic; /* I'm a GM message */ - __u16 gmm_version; /* this is my version number */ - - __u16 gmm_type; /* msg type */ - __u64 gmm_srcnid; /* sender's NID */ - __u64 gmm_dstnid; /* destination's NID */ - union { - gmnal_immediate_msg_t immediate; - } gmm_u; -} WIRE_ATTR gmnal_msg_t; - -#define GMNAL_MSG_MAGIC LNET_PROTO_GM_MAGIC -#define GMNAL_MSG_VERSION 1 -#define GMNAL_MSG_IMMEDIATE 1 - -typedef struct netbuf { - __u64 nb_netaddr; /* network VM address */ - lnet_kiov_t nb_kiov[1]; /* the pages (at least 1) */ -} gmnal_netbuf_t; - -#define GMNAL_NETBUF_MSG(nb) ((gmnal_msg_t *)page_address((nb)->nb_kiov[0].kiov_page)) -#define GMNAL_NETBUF_LOCAL_NETADDR(nb) ((void *)((unsigned long)(nb)->nb_netaddr)) - -typedef struct gmnal_txbuf { - struct list_head txb_list; /* queue on gmni_idle_ltxbs */ - struct gmnal_txbuf *txb_next; /* stash on gmni_ltxs */ - gmnal_netbuf_t txb_buf; /* space */ -} gmnal_txbuf_t; - -typedef struct gmnal_tx { - struct list_head tx_list; /* queue */ - int tx_credit:1; /* consumed a credit? */ - int tx_large_iskiov:1; /* large is in kiovs? */ - struct gmnal_ni *tx_gmni; /* owning NI */ - lnet_nid_t tx_nid; /* destination NID */ - int tx_gmlid; /* destination GM local ID */ - lnet_msg_t *tx_lntmsg; /* lntmsg to finalize on completion */ - - gmnal_netbuf_t tx_buf; /* small tx buffer */ - gmnal_txbuf_t *tx_ltxb; /* large buffer (to free on completion) */ - int tx_msgnob; /* message size (so far) */ - - int tx_large_nob; /* # bytes large buffer payload */ - int tx_large_offset; /* offset within frags */ - int tx_large_niov; /* # VM frags */ - union { - struct iovec *iov; /* mapped frags */ - lnet_kiov_t *kiov; /* page frags */ - } tx_large_frags; - cfs_time_t tx_launchtime; /* when (in jiffies) the - * transmit was launched */ - struct gmnal_tx *tx_next; /* stash on gmni_txs */ -} gmnal_tx_t; - -typedef struct gmnal_rx { - struct list_head rx_list; /* enqueue on gmni_rxq for handling */ - int rx_islarge:1; /* large receive buffer? */ - unsigned int rx_recv_nob; /* bytes received */ - __u16 rx_recv_gmid; /* sender */ - __u8 rx_recv_port; /* sender's port */ - __u8 rx_recv_type; /* ?? */ - struct gmnal_rx *rx_next; /* stash on gmni_rxs */ - gmnal_netbuf_t rx_buf; /* the buffer */ -} gmnal_rx_t; - -typedef struct gmnal_ni { - lnet_ni_t *gmni_ni; /* generic NI */ - struct gm_port *gmni_port; /* GM port */ - spinlock_t gmni_gm_lock; /* serialise GM calls */ - int gmni_large_pages; /* # pages in a large message buffer */ - int gmni_large_msgsize; /* nob in large message buffers */ - int gmni_large_gmsize; /* large message GM bucket */ - int gmni_small_msgsize; /* nob in small message buffers */ - int gmni_small_gmsize; /* small message GM bucket */ - __u64 gmni_netaddr_base; /* base of mapped network VM */ - int gmni_netaddr_size; /* # bytes of mapped network VM */ - - gmnal_tx_t *gmni_txs; /* all txs */ - gmnal_rx_t *gmni_rxs; /* all rx descs */ - gmnal_txbuf_t *gmni_ltxbs; /* all large tx bufs */ - - atomic_t gmni_nthreads; /* total # threads */ - gm_alarm_t gmni_alarm; /* alarm to wake caretaker */ - int gmni_shutdown; /* tell all threads to exit */ - - struct list_head gmni_idle_txs; /* idle tx's */ - int gmni_tx_credits; /* # transmits still possible */ - struct list_head gmni_idle_ltxbs; /* idle large tx buffers */ - struct list_head gmni_buf_txq; /* tx's waiting for buffers */ - struct list_head gmni_cred_txq; /* tx's waiting for credits */ - spinlock_t gmni_tx_lock; /* serialise */ - - struct gm_hash *gmni_rx_hash; /* buffer->rx lookup */ - struct semaphore gmni_rx_mutex; /* serialise blocking on GM */ -} gmnal_ni_t; - -typedef struct { - int *gm_port; - int *gm_ntx; - int *gm_credits; - int *gm_peer_credits; - int *gm_nlarge_tx_bufs; - int *gm_nrx_small; - int *gm_nrx_large; - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - cfs_sysctl_table_header_t *gm_sysctl; /* sysctl interface */ -#endif -} gmnal_tunables_t; - - -/* gmnal_api.c */ -int gmnal_init(void); -void gmnal_fini(void); -int gmnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int gmnal_startup(lnet_ni_t *ni); -void gmnal_shutdown(lnet_ni_t *ni); - -/* gmnal_cb.c */ -int gmnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int gmnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); - -/* gmnal_util.c */ -void gmnal_free_ltxbufs(gmnal_ni_t *gmni); -int gmnal_alloc_ltxbufs(gmnal_ni_t *gmni); -void gmnal_free_txs(gmnal_ni_t *gmni); -int gmnal_alloc_txs(gmnal_ni_t *gmni); -void gmnal_free_rxs(gmnal_ni_t *gmni); -int gmnal_alloc_rxs(gmnal_ni_t *gmni); -char *gmnal_gmstatus2str(gm_status_t status); -char *gmnal_rxevent2str(gm_recv_event_t *ev); -void gmnal_yield(int delay); - -/* gmnal_comm.c */ -void gmnal_post_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx); -gmnal_tx_t *gmnal_get_tx(gmnal_ni_t *gmni); -void gmnal_tx_done(gmnal_tx_t *tx, int rc); -void gmnal_pack_msg(gmnal_ni_t *gmni, gmnal_msg_t *msg, - lnet_nid_t dstnid, int type); -void gmnal_stop_threads(gmnal_ni_t *gmni); -int gmnal_start_threads(gmnal_ni_t *gmni); -void gmnal_check_txqueues_locked (gmnal_ni_t *gmni); - -/* Module Parameters */ -extern gmnal_tunables_t gmnal_tunables; - -#endif /*__INCLUDE_GMNAL_H__*/ diff --git a/lnet/klnds/gmlnd/gmlnd_api.c b/lnet/klnds/gmlnd/gmlnd_api.c deleted file mode 100644 index e316309e2e3b1930aa815a9590b080ce5618d512..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_api.c +++ /dev/null @@ -1,266 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Implements the API NAL functions - */ - -#include "gmlnd.h" - -lnd_t the_gmlnd = -{ - .lnd_type = GMLND, - .lnd_startup = gmnal_startup, - .lnd_shutdown = gmnal_shutdown, - .lnd_ctl = gmnal_ctl, - .lnd_send = gmnal_send, - .lnd_recv = gmnal_recv, -}; - -gmnal_ni_t *the_gmni = NULL; - -int -gmnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - - switch (cmd) { - case IOC_LIBCFS_REGISTER_MYNID: - if (data->ioc_nid == ni->ni_nid) - return 0; - - LASSERT (LNET_NIDNET(data->ioc_nid) == LNET_NIDNET(ni->ni_nid)); - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID for %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return 0; - - default: - return (-EINVAL); - } -} - -int -gmnal_set_local_nid (gmnal_ni_t *gmni) -{ - lnet_ni_t *ni = gmni->gmni_ni; - __u32 local_gmid; - __u32 global_gmid; - gm_status_t gm_status; - - /* Called before anything initialised: no need to lock */ - gm_status = gm_get_node_id(gmni->gmni_port, &local_gmid); - if (gm_status != GM_SUCCESS) - return 0; - - CDEBUG(D_NET, "Local node id is [%u]\n", local_gmid); - - gm_status = gm_node_id_to_global_id(gmni->gmni_port, - local_gmid, - &global_gmid); - if (gm_status != GM_SUCCESS) - return 0; - - CDEBUG(D_NET, "Global node id is [%u]\n", global_gmid); - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), global_gmid); - return 1; -} - -void -gmnal_shutdown(lnet_ni_t *ni) -{ - gmnal_ni_t *gmni = ni->ni_data; - - CDEBUG(D_TRACE, "gmnal_api_shutdown: gmni [%p]\n", gmni); - - LASSERT (gmni == the_gmni); - - /* stop processing messages */ - gmnal_stop_threads(gmni); - - /* stop all network callbacks */ - gm_close(gmni->gmni_port); - gmni->gmni_port = NULL; - - gm_finalize(); - - gmnal_free_ltxbufs(gmni); - gmnal_free_txs(gmni); - gmnal_free_rxs(gmni); - - LIBCFS_FREE(gmni, sizeof(*gmni)); - - the_gmni = NULL; - PORTAL_MODULE_UNUSE; -} - -int -gmnal_startup(lnet_ni_t *ni) -{ - gmnal_ni_t *gmni = NULL; - gmnal_rx_t *rx = NULL; - gm_status_t gm_status; - int rc; - - LASSERT (ni->ni_lnd == &the_gmlnd); - - ni->ni_maxtxcredits = *gmnal_tunables.gm_credits; - ni->ni_peertxcredits = *gmnal_tunables.gm_peer_credits; - - if (the_gmni != NULL) { - CERROR("Only 1 instance supported\n"); - return -EINVAL; - } - - LIBCFS_ALLOC(gmni, sizeof(*gmni)); - if (gmni == NULL) { - CERROR("can't allocate gmni\n"); - return -ENOMEM; - } - - ni->ni_data = gmni; - - memset(gmni, 0, sizeof(*gmni)); - gmni->gmni_ni = ni; - spin_lock_init(&gmni->gmni_tx_lock); - spin_lock_init(&gmni->gmni_gm_lock); - INIT_LIST_HEAD(&gmni->gmni_idle_txs); - INIT_LIST_HEAD(&gmni->gmni_idle_ltxbs); - INIT_LIST_HEAD(&gmni->gmni_buf_txq); - INIT_LIST_HEAD(&gmni->gmni_cred_txq); - sema_init(&gmni->gmni_rx_mutex, 1); - PORTAL_MODULE_USE; - - /* - * initialise the interface, - */ - CDEBUG(D_NET, "Calling gm_init\n"); - if (gm_init() != GM_SUCCESS) { - CERROR("call to gm_init failed\n"); - goto failed_0; - } - - CDEBUG(D_NET, "Calling gm_open with port [%d], version [%d]\n", - *gmnal_tunables.gm_port, GM_API_VERSION); - - gm_status = gm_open(&gmni->gmni_port, 0, *gmnal_tunables.gm_port, - "gmnal", GM_API_VERSION); - - if (gm_status != GM_SUCCESS) { - CERROR("Can't open GM port %d: %d (%s)\n", - *gmnal_tunables.gm_port, gm_status, - gmnal_gmstatus2str(gm_status)); - goto failed_1; - } - - CDEBUG(D_NET,"gm_open succeeded port[%p]\n",gmni->gmni_port); - - if (!gmnal_set_local_nid(gmni)) - goto failed_2; - - CDEBUG(D_NET, "portals_nid is %s\n", libcfs_nid2str(ni->ni_nid)); - - gmni->gmni_large_msgsize = - offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[LNET_MAX_PAYLOAD]); - gmni->gmni_large_gmsize = - gm_min_size_for_length(gmni->gmni_large_msgsize); - gmni->gmni_large_pages = - (gmni->gmni_large_msgsize + PAGE_SIZE - 1)/PAGE_SIZE; - - gmni->gmni_small_msgsize = MIN(GM_MTU, PAGE_SIZE); - gmni->gmni_small_gmsize = - gm_min_size_for_length(gmni->gmni_small_msgsize); - - gmni->gmni_netaddr_base = GMNAL_NETADDR_BASE; - gmni->gmni_netaddr_size = 0; - - CDEBUG(D_NET, "Msg size %08x/%08x [%d/%d]\n", - gmni->gmni_large_msgsize, gmni->gmni_small_msgsize, - gmni->gmni_large_gmsize, gmni->gmni_small_gmsize); - - if (gmnal_alloc_rxs(gmni) != 0) { - CERROR("Failed to allocate rx descriptors\n"); - goto failed_2; - } - - if (gmnal_alloc_txs(gmni) != 0) { - CERROR("Failed to allocate tx descriptors\n"); - goto failed_2; - } - - if (gmnal_alloc_ltxbufs(gmni) != 0) { - CERROR("Failed to allocate large tx buffers\n"); - goto failed_2; - } - - rc = gmnal_start_threads(gmni); - if (rc != 0) { - CERROR("Can't start threads: %d\n", rc); - goto failed_2; - } - - /* Start listening */ - for (rx = gmni->gmni_rxs; rx != NULL; rx = rx->rx_next) - gmnal_post_rx(gmni, rx); - - the_gmni = gmni; - - CDEBUG(D_NET, "gmnal_init finished\n"); - return 0; - - failed_2: - gm_close(gmni->gmni_port); - gmni->gmni_port = NULL; - - failed_1: - gm_finalize(); - - failed_0: - /* safe to free descriptors after network has been shut down */ - gmnal_free_ltxbufs(gmni); - gmnal_free_txs(gmni); - gmnal_free_rxs(gmni); - - LIBCFS_FREE(gmni, sizeof(*gmni)); - PORTAL_MODULE_UNUSE; - - return -EIO; -} - -/* - * Called when module loaded - */ -int gmnal_init(void) -{ - lnet_register_lnd(&the_gmlnd); - return 0; -} - -/* - * Called when module removed - */ -void gmnal_fini() -{ - lnet_unregister_lnd(&the_gmlnd); -} diff --git a/lnet/klnds/gmlnd/gmlnd_cb.c b/lnet/klnds/gmlnd/gmlnd_cb.c deleted file mode 100644 index a1a8df9bd48a9916f3fd73d6a856975ef7f580c8..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_cb.c +++ /dev/null @@ -1,162 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -/* - * This file implements the nal cb functions - */ - - -#include "gmlnd.h" - -int -gmnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - gmnal_ni_t *gmni = ni->ni_data; - gmnal_rx_t *rx = (gmnal_rx_t*)private; - gmnal_msg_t *msg = GMNAL_NETBUF_MSG(&rx->rx_buf); - int npages = rx->rx_islarge ? gmni->gmni_large_pages : 1; - int payload_offset = offsetof(gmnal_msg_t, - gmm_u.immediate.gmim_payload[0]); - int nob = payload_offset + mlen; - - LASSERT (msg->gmm_type == GMNAL_MSG_IMMEDIATE); - LASSERT (iov == NULL || kiov == NULL); - - if (rx->rx_recv_nob < nob) { - CERROR("Short message from nid %s: got %d, need %d\n", - libcfs_nid2str(msg->gmm_srcnid), rx->rx_recv_nob, nob); - gmnal_post_rx(gmni, rx); - return -EIO; - } - - if (kiov != NULL) - lnet_copy_kiov2kiov(niov, kiov, offset, - npages, rx->rx_buf.nb_kiov, payload_offset, - mlen); - else - lnet_copy_kiov2iov(niov, iov, offset, - npages, rx->rx_buf.nb_kiov, payload_offset, - mlen); - - lnet_finalize(ni, lntmsg, 0); - gmnal_post_rx(gmni, rx); - return 0; -} - -int -gmnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr= &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int len = lntmsg->msg_len; - gmnal_ni_t *gmni = ni->ni_data; - gm_status_t gmrc; - gmnal_tx_t *tx; - - LASSERT (iov == NULL || kiov == NULL); - - /* I may not block for a tx if I'm responding to an incoming message */ - tx = gmnal_get_tx(gmni); - if (tx == NULL) { - if (!gmni->gmni_shutdown) - CERROR ("Can't get tx for msg type %d for %s\n", - type, libcfs_nid2str(target.nid)); - return -EIO; - } - - tx->tx_nid = target.nid; - - gmrc = gm_global_id_to_node_id(gmni->gmni_port, LNET_NIDADDR(target.nid), - &tx->tx_gmlid); - if (gmrc != GM_SUCCESS) { - CERROR("Can't map Nid %s to a GM local ID: %d\n", - libcfs_nid2str(target.nid), gmrc); - /* NB tx_lntmsg not set => doesn't finalize */ - gmnal_tx_done(tx, -EIO); - return -EIO; - } - - gmnal_pack_msg(gmni, GMNAL_NETBUF_MSG(&tx->tx_buf), - target.nid, GMNAL_MSG_IMMEDIATE); - GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_u.immediate.gmim_hdr = *hdr; - tx->tx_msgnob = offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0]); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto test */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_magic = - LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - if (tx->tx_msgnob + len <= gmni->gmni_small_msgsize) { - /* whole message fits in tx_buf */ - char *buffer = &(GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_u.immediate.gmim_payload[0]); - - if (iov != NULL) - lnet_copy_iov2flat(len, buffer, 0, - niov, iov, offset, len); - else - lnet_copy_kiov2flat(len, buffer, 0, - niov, kiov, offset, len); - - tx->tx_msgnob += len; - tx->tx_large_nob = 0; - } else { - /* stash payload pts to copy later */ - tx->tx_large_nob = len; - tx->tx_large_iskiov = (kiov != NULL); - tx->tx_large_niov = niov; - if (tx->tx_large_iskiov) - tx->tx_large_frags.kiov = kiov; - else - tx->tx_large_frags.iov = iov; - } - - LASSERT(tx->tx_lntmsg == NULL); - tx->tx_lntmsg = lntmsg; - - spin_lock(&gmni->gmni_tx_lock); - - list_add_tail(&tx->tx_list, &gmni->gmni_buf_txq); - gmnal_check_txqueues_locked(gmni); - - spin_unlock(&gmni->gmni_tx_lock); - - return 0; -} diff --git a/lnet/klnds/gmlnd/gmlnd_comm.c b/lnet/klnds/gmlnd/gmlnd_comm.c deleted file mode 100644 index 5f48cf0057089e24a737e04e18eb9968d927f6cd..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_comm.c +++ /dev/null @@ -1,562 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * This file contains all gmnal send and receive functions - */ - -#include "gmlnd.h" - -void -gmnal_notify_peer_down(gmnal_tx_t *tx) -{ - time_t then; - - then = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - tx->tx_launchtime); - - lnet_notify(tx->tx_gmni->gmni_ni, tx->tx_nid, 0, then); -} - -void -gmnal_pack_msg(gmnal_ni_t *gmni, gmnal_msg_t *msg, - lnet_nid_t dstnid, int type) -{ - /* CAVEAT EMPTOR! this only sets the common message fields. */ - msg->gmm_magic = GMNAL_MSG_MAGIC; - msg->gmm_version = GMNAL_MSG_VERSION; - msg->gmm_type = type; - msg->gmm_srcnid = lnet_ptlcompat_srcnid(gmni->gmni_ni->ni_nid, - dstnid); - msg->gmm_dstnid = dstnid; -} - -int -gmnal_unpack_msg(gmnal_ni_t *gmni, gmnal_rx_t *rx) -{ - gmnal_msg_t *msg = GMNAL_NETBUF_MSG(&rx->rx_buf); - const int hdr_size = offsetof(gmnal_msg_t, gmm_u); - int buffnob = rx->rx_islarge ? gmni->gmni_large_msgsize : - gmni->gmni_small_msgsize; - int flip; - - /* rc = 0:SUCCESS -ve:failure +ve:version mismatch */ - - /* GM may not overflow our buffer */ - LASSERT (rx->rx_recv_nob <= buffnob); - - /* 6 bytes are enough to have received magic + version */ - if (rx->rx_recv_nob < 6) { - CERROR("Short message from gmid %u: %d\n", - rx->rx_recv_gmid, rx->rx_recv_nob); - return -EPROTO; - } - - if (msg->gmm_magic == GMNAL_MSG_MAGIC) { - flip = 0; - } else if (msg->gmm_magic == __swab32(GMNAL_MSG_MAGIC)) { - flip = 1; - } else if (msg->gmm_magic == LNET_PROTO_MAGIC || - msg->gmm_magic == __swab32(LNET_PROTO_MAGIC)) { - return EPROTO; - } else { - CERROR("Bad magic from gmid %u: %08x\n", - rx->rx_recv_gmid, msg->gmm_magic); - return -EPROTO; - } - - if (msg->gmm_version != - (flip ? __swab16(GMNAL_MSG_VERSION) : GMNAL_MSG_VERSION)) { - return EPROTO; - } - - if (rx->rx_recv_nob < hdr_size) { - CERROR("Short message from %u: %d\n", - rx->rx_recv_gmid, rx->rx_recv_nob); - return -EPROTO; - } - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - __swab16s(&msg->gmm_version); - __swab16s(&msg->gmm_type); - __swab64s(&msg->gmm_srcnid); - __swab64s(&msg->gmm_dstnid); - } - - if (msg->gmm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid from %u: %s\n", - rx->rx_recv_gmid, libcfs_nid2str(msg->gmm_srcnid)); - return -EPROTO; - } - - if (!lnet_ptlcompat_matchnid(gmni->gmni_ni->ni_nid, - msg->gmm_dstnid)) { - CERROR("Bad dst nid from %u: %s\n", - rx->rx_recv_gmid, libcfs_nid2str(msg->gmm_dstnid)); - return -EPROTO; - } - - switch (msg->gmm_type) { - default: - CERROR("Unknown message type from %u: %x\n", - rx->rx_recv_gmid, msg->gmm_type); - return -EPROTO; - - case GMNAL_MSG_IMMEDIATE: - if (rx->rx_recv_nob < offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0])) { - CERROR("Short IMMEDIATE from %u: %d("LPSZ")\n", - rx->rx_recv_gmid, rx->rx_recv_nob, - offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0])); - return -EPROTO; - } - break; - } - return 0; -} - -gmnal_tx_t * -gmnal_get_tx(gmnal_ni_t *gmni) -{ - gmnal_tx_t *tx = NULL; - - spin_lock(&gmni->gmni_tx_lock); - - if (gmni->gmni_shutdown || - list_empty(&gmni->gmni_idle_txs)) { - spin_unlock(&gmni->gmni_tx_lock); - return NULL; - } - - tx = list_entry(gmni->gmni_idle_txs.next, gmnal_tx_t, tx_list); - list_del(&tx->tx_list); - - spin_unlock(&gmni->gmni_tx_lock); - - LASSERT (tx->tx_lntmsg == NULL); - LASSERT (tx->tx_ltxb == NULL); - LASSERT (!tx->tx_credit); - - return tx; -} - -void -gmnal_tx_done(gmnal_tx_t *tx, int rc) -{ - gmnal_ni_t *gmni = tx->tx_gmni; - int wake_sched = 0; - lnet_msg_t *lnetmsg = tx->tx_lntmsg; - - tx->tx_lntmsg = NULL; - - spin_lock(&gmni->gmni_tx_lock); - - if (tx->tx_ltxb != NULL) { - wake_sched = 1; - list_add_tail(&tx->tx_ltxb->txb_list, &gmni->gmni_idle_ltxbs); - tx->tx_ltxb = NULL; - } - - if (tx->tx_credit) { - wake_sched = 1; - gmni->gmni_tx_credits++; - tx->tx_credit = 0; - } - - list_add_tail(&tx->tx_list, &gmni->gmni_idle_txs); - - if (wake_sched) - gmnal_check_txqueues_locked(gmni); - - spin_unlock(&gmni->gmni_tx_lock); - - /* Delay finalize until tx is free */ - if (lnetmsg != NULL) - lnet_finalize(gmni->gmni_ni, lnetmsg, rc); -} - -void -gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, - gm_status_t status) -{ - gmnal_tx_t *tx = (gmnal_tx_t*)context; - - LASSERT(!in_interrupt()); - - CDEBUG(D_NET, "status for tx [%p] is [%d][%s], nid %s\n", - tx, status, gmnal_gmstatus2str(status), - libcfs_nid2str(tx->tx_nid)); - - gmnal_tx_done(tx, -EIO); -} - -void -gmnal_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) -{ - gmnal_tx_t *tx = (gmnal_tx_t*)context; - gmnal_ni_t *gmni = tx->tx_gmni; - - LASSERT(!in_interrupt()); - - switch(status) { - case GM_SUCCESS: - gmnal_tx_done(tx, 0); - return; - - case GM_SEND_DROPPED: - CDEBUG(D_NETERROR, "Dropped tx %p to %s\n", - tx, libcfs_nid2str(tx->tx_nid)); - /* Another tx failed and called gm_drop_sends() which made this - * one complete immediately */ - gmnal_tx_done(tx, -EIO); - return; - - default: - /* Some error; NB don't complete tx yet; we need its credit for - * gm_drop_sends() */ - CDEBUG(D_NETERROR, "tx %p error %d(%s), nid %s\n", - tx, status, gmnal_gmstatus2str(status), - libcfs_nid2str(tx->tx_nid)); - - gmnal_notify_peer_down(tx); - - spin_lock(&gmni->gmni_gm_lock); - gm_drop_sends(gmni->gmni_port, - tx->tx_ltxb != NULL ? - GMNAL_LARGE_PRIORITY : GMNAL_SMALL_PRIORITY, - tx->tx_gmlid, *gmnal_tunables.gm_port, - gmnal_drop_sends_callback, tx); - spin_unlock(&gmni->gmni_gm_lock); - return; - } - - /* not reached */ - LBUG(); -} - -void -gmnal_check_txqueues_locked (gmnal_ni_t *gmni) -{ - gmnal_tx_t *tx; - gmnal_txbuf_t *ltxb; - int gmsize; - int pri; - void *netaddr; - - tx = list_empty(&gmni->gmni_buf_txq) ? NULL : - list_entry(gmni->gmni_buf_txq.next, gmnal_tx_t, tx_list); - - if (tx != NULL && - (tx->tx_large_nob == 0 || - !list_empty(&gmni->gmni_idle_ltxbs))) { - - /* consume tx */ - list_del(&tx->tx_list); - - LASSERT (tx->tx_ltxb == NULL); - - if (tx->tx_large_nob != 0) { - ltxb = list_entry(gmni->gmni_idle_ltxbs.next, - gmnal_txbuf_t, txb_list); - - /* consume large buffer */ - list_del(<xb->txb_list); - - spin_unlock(&gmni->gmni_tx_lock); - - /* Unlocking here allows sends to get re-ordered, - * but we want to allow other CPUs to progress... */ - - tx->tx_ltxb = ltxb; - - /* marshall message in tx_ltxb... - * 1. Copy what was marshalled so far (in tx_buf) */ - memcpy(GMNAL_NETBUF_MSG(<xb->txb_buf), - GMNAL_NETBUF_MSG(&tx->tx_buf), tx->tx_msgnob); - - /* 2. Copy the payload */ - if (tx->tx_large_iskiov) - lnet_copy_kiov2kiov( - gmni->gmni_large_pages, - ltxb->txb_buf.nb_kiov, - tx->tx_msgnob, - tx->tx_large_niov, - tx->tx_large_frags.kiov, - tx->tx_large_offset, - tx->tx_large_nob); - else - lnet_copy_iov2kiov( - gmni->gmni_large_pages, - ltxb->txb_buf.nb_kiov, - tx->tx_msgnob, - tx->tx_large_niov, - tx->tx_large_frags.iov, - tx->tx_large_offset, - tx->tx_large_nob); - - tx->tx_msgnob += tx->tx_large_nob; - - spin_lock(&gmni->gmni_tx_lock); - } - - list_add_tail(&tx->tx_list, &gmni->gmni_cred_txq); - } - - if (!list_empty(&gmni->gmni_cred_txq) && - gmni->gmni_tx_credits != 0) { - - tx = list_entry(gmni->gmni_cred_txq.next, gmnal_tx_t, tx_list); - - /* consume tx and 1 credit */ - list_del(&tx->tx_list); - gmni->gmni_tx_credits--; - - spin_unlock(&gmni->gmni_tx_lock); - - /* Unlocking here allows sends to get re-ordered, but we want - * to allow other CPUs to progress... */ - - LASSERT(!tx->tx_credit); - tx->tx_credit = 1; - - tx->tx_launchtime = cfs_time_current(); - - if (tx->tx_msgnob <= gmni->gmni_small_msgsize) { - LASSERT (tx->tx_ltxb == NULL); - netaddr = GMNAL_NETBUF_LOCAL_NETADDR(&tx->tx_buf); - gmsize = gmni->gmni_small_gmsize; - pri = GMNAL_SMALL_PRIORITY; - } else { - LASSERT (tx->tx_ltxb != NULL); - netaddr = GMNAL_NETBUF_LOCAL_NETADDR(&tx->tx_ltxb->txb_buf); - gmsize = gmni->gmni_large_gmsize; - pri = GMNAL_LARGE_PRIORITY; - } - - spin_lock(&gmni->gmni_gm_lock); - - gm_send_to_peer_with_callback(gmni->gmni_port, - netaddr, gmsize, - tx->tx_msgnob, - pri, - tx->tx_gmlid, - gmnal_tx_callback, - (void*)tx); - - spin_unlock(&gmni->gmni_gm_lock); - spin_lock(&gmni->gmni_tx_lock); - } -} - -void -gmnal_post_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx) -{ - int gmsize = rx->rx_islarge ? gmni->gmni_large_gmsize : - gmni->gmni_small_gmsize; - int pri = rx->rx_islarge ? GMNAL_LARGE_PRIORITY : - GMNAL_SMALL_PRIORITY; - void *buffer = GMNAL_NETBUF_LOCAL_NETADDR(&rx->rx_buf); - - CDEBUG(D_NET, "posting rx %p buf %p\n", rx, buffer); - - spin_lock(&gmni->gmni_gm_lock); - gm_provide_receive_buffer_with_tag(gmni->gmni_port, - buffer, gmsize, pri, 0); - spin_unlock(&gmni->gmni_gm_lock); -} - -void -gmnal_version_reply (gmnal_ni_t *gmni, gmnal_rx_t *rx) -{ - /* Future protocol version compatibility support! - * The next gmlnd-specific protocol rev will first send a message to - * check version; I reply with a stub message containing my current - * magic+version... */ - gmnal_msg_t *msg; - gmnal_tx_t *tx = gmnal_get_tx(gmni); - - if (tx == NULL) { - CERROR("Can't allocate tx to send version info to %u\n", - rx->rx_recv_gmid); - return; - } - - LASSERT (tx->tx_lntmsg == NULL); /* no finalize */ - - tx->tx_nid = LNET_NID_ANY; - tx->tx_gmlid = rx->rx_recv_gmid; - - msg = GMNAL_NETBUF_MSG(&tx->tx_buf); - msg->gmm_magic = GMNAL_MSG_MAGIC; - msg->gmm_version = GMNAL_MSG_VERSION; - - /* just send magic + version */ - tx->tx_msgnob = offsetof(gmnal_msg_t, gmm_type); - tx->tx_large_nob = 0; - - spin_lock(&gmni->gmni_tx_lock); - - list_add_tail(&tx->tx_list, &gmni->gmni_buf_txq); - gmnal_check_txqueues_locked(gmni); - - spin_unlock(&gmni->gmni_tx_lock); -} - -int -gmnal_rx_thread(void *arg) -{ - gmnal_ni_t *gmni = arg; - gm_recv_event_t *rxevent = NULL; - gm_recv_t *recv = NULL; - gmnal_rx_t *rx; - int rc; - - cfs_daemonize("gmnal_rxd"); - - while (!gmni->gmni_shutdown) { - rc = down_interruptible(&gmni->gmni_rx_mutex); - LASSERT (rc == 0 || rc == -EINTR); - if (rc != 0) - continue; - - spin_lock(&gmni->gmni_gm_lock); - rxevent = gm_blocking_receive_no_spin(gmni->gmni_port); - spin_unlock(&gmni->gmni_gm_lock); - - switch (GM_RECV_EVENT_TYPE(rxevent)) { - default: - gm_unknown(gmni->gmni_port, rxevent); - up(&gmni->gmni_rx_mutex); - continue; - - case GM_FAST_RECV_EVENT: - case GM_FAST_PEER_RECV_EVENT: - case GM_PEER_RECV_EVENT: - case GM_FAST_HIGH_RECV_EVENT: - case GM_FAST_HIGH_PEER_RECV_EVENT: - case GM_HIGH_PEER_RECV_EVENT: - case GM_RECV_EVENT: - case GM_HIGH_RECV_EVENT: - break; - } - - recv = &rxevent->recv; - rx = gm_hash_find(gmni->gmni_rx_hash, - gm_ntohp(recv->buffer)); - LASSERT (rx != NULL); - - rx->rx_recv_nob = gm_ntoh_u32(recv->length); - rx->rx_recv_gmid = gm_ntoh_u16(recv->sender_node_id); - rx->rx_recv_port = gm_ntoh_u8(recv->sender_port_id); - rx->rx_recv_type = gm_ntoh_u8(recv->type); - - switch (GM_RECV_EVENT_TYPE(rxevent)) { - case GM_FAST_RECV_EVENT: - case GM_FAST_PEER_RECV_EVENT: - case GM_FAST_HIGH_RECV_EVENT: - case GM_FAST_HIGH_PEER_RECV_EVENT: - LASSERT (rx->rx_recv_nob <= PAGE_SIZE); - - memcpy(GMNAL_NETBUF_MSG(&rx->rx_buf), - gm_ntohp(recv->message), rx->rx_recv_nob); - break; - } - - up(&gmni->gmni_rx_mutex); - - CDEBUG (D_NET, "rx %p: buf %p(%p) nob %d\n", rx, - GMNAL_NETBUF_LOCAL_NETADDR(&rx->rx_buf), - gm_ntohp(recv->buffer), rx->rx_recv_nob); - - /* We're connectionless: simply drop packets with - * errors */ - rc = gmnal_unpack_msg(gmni, rx); - - if (rc == 0) { - gmnal_msg_t *msg = GMNAL_NETBUF_MSG(&rx->rx_buf); - - LASSERT (msg->gmm_type == GMNAL_MSG_IMMEDIATE); - rc = lnet_parse(gmni->gmni_ni, - &msg->gmm_u.immediate.gmim_hdr, - msg->gmm_srcnid, rx, 0); - } else if (rc > 0) { - gmnal_version_reply(gmni, rx); - rc = -EPROTO; /* repost rx */ - } - - if (rc < 0) /* parse failure */ - gmnal_post_rx(gmni, rx); - } - - CDEBUG(D_NET, "exiting\n"); - atomic_dec(&gmni->gmni_nthreads); - return 0; -} - -void -gmnal_stop_threads(gmnal_ni_t *gmni) -{ - int count = 2; - - gmni->gmni_shutdown = 1; - mb(); - - /* wake rxthread owning gmni_rx_mutex with an alarm. */ - spin_lock(&gmni->gmni_gm_lock); - gm_set_alarm(gmni->gmni_port, &gmni->gmni_alarm, 0, NULL, NULL); - spin_unlock(&gmni->gmni_gm_lock); - - while (atomic_read(&gmni->gmni_nthreads) != 0) { - count++; - if ((count & (count - 1)) == 0) - CWARN("Waiting for %d threads to stop\n", - atomic_read(&gmni->gmni_nthreads)); - gmnal_yield(1); - } -} - -int -gmnal_start_threads(gmnal_ni_t *gmni) -{ - int i; - int pid; - - LASSERT (!gmni->gmni_shutdown); - LASSERT (atomic_read(&gmni->gmni_nthreads) == 0); - - gm_initialize_alarm(&gmni->gmni_alarm); - - for (i = 0; i < num_online_cpus(); i++) { - - pid = kernel_thread(gmnal_rx_thread, (void*)gmni, 0); - if (pid < 0) { - CERROR("rx thread failed to start: %d\n", pid); - gmnal_stop_threads(gmni); - return pid; - } - - atomic_inc(&gmni->gmni_nthreads); - } - - return 0; -} diff --git a/lnet/klnds/gmlnd/gmlnd_module.c b/lnet/klnds/gmlnd/gmlnd_module.c deleted file mode 100644 index f4d180a907392d7152a77cdb13dabcd4c932c87c..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_module.c +++ /dev/null @@ -1,180 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "gmlnd.h" - - -static int port = 4; -CFS_MODULE_PARM(port, "i", int, 0444, - "GM port to use for communications"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# tx descriptors"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends per peer"); - -static int nlarge_tx_bufs = 32; -CFS_MODULE_PARM(nlarge_tx_bufs, "i", int, 0444, - "# large tx message buffers"); - -static int nrx_small = 128; -CFS_MODULE_PARM(nrx_small, "i", int, 0444, - "# small rx message buffers"); - -static int nrx_large = 64; -CFS_MODULE_PARM(nrx_large, "i", int, 0444, - "# large rx message buffers"); - -gmnal_tunables_t gmnal_tunables = { - .gm_port = &port, - .gm_ntx = &ntx, - .gm_credits = &credits, - .gm_peer_credits = &peer_credits, - .gm_nlarge_tx_bufs = &nlarge_tx_bufs, - .gm_nrx_small = &nrx_small, - .gm_nrx_large = &nrx_large, -}; - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM -static cfs_sysctl_table_t gmnal_ctl_table[] = { - { - .ctl_name = 1, - .procname = "port", - .data = &port, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 2, - .procname = "ntx", - .data = &ntx, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 3, - .procname = "credits", - .data = &credits, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 4, - .procname = "peer_credits", - .data = &peer_credits, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 5, - .procname = "nlarge_tx_bufs", - .data = &nlarge_tx_bufs, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 6, - .procname = "nrx_small", - .data = &nrx_small, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 7, - .procname = "nrx_large", - .data = &nrx_large, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - {0} -}; - -static cfs_sysctl_table_t gmnal_top_ctl_table[] = { - { - .ctl_name = 207, - .procname = "gmnal", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = gmnal_ctl_table - }, - {0} -}; -#endif - -static int __init -gmnal_load(void) -{ - int status; - CDEBUG(D_TRACE, "This is the gmnal module initialisation routine\n"); - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - gmnal_tunables.gm_sysctl = - cfs_register_sysctl_table(gmnal_top_ctl_table, 0); - - if (gmnal_tunables.gm_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); -#endif - CDEBUG(D_NET, "Calling gmnal_init\n"); - status = gmnal_init(); - if (status == 0) { - CDEBUG(D_NET, "Portals GMNAL initialised ok\n"); - } else { - CDEBUG(D_NET, "Portals GMNAL Failed to initialise\n"); - return(-ENODEV); - } - - CDEBUG(D_NET, "This is the end of the gmnal init routine"); - - return(0); -} - -static void __exit -gmnal_unload(void) -{ - gmnal_fini(); -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - if (gmnal_tunables.gm_sysctl != NULL) - cfs_unregister_sysctl_table(gmnal_tunables.gm_sysctl); -#endif -} - -module_init(gmnal_load); -module_exit(gmnal_unload); - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel GM LND v1.01"); -MODULE_LICENSE("GPL"); diff --git a/lnet/klnds/gmlnd/gmlnd_utils.c b/lnet/klnds/gmlnd/gmlnd_utils.c deleted file mode 100644 index a39316550e065e8a18d82b1d972cccf58d01b68e..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_utils.c +++ /dev/null @@ -1,579 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "gmlnd.h" - -void -gmnal_free_netbuf_pages (gmnal_netbuf_t *nb, int npages) -{ - int i; - - for (i = 0; i < npages; i++) - __free_page(nb->nb_kiov[i].kiov_page); -} - -int -gmnal_alloc_netbuf_pages (gmnal_ni_t *gmni, gmnal_netbuf_t *nb, int npages) -{ - int i; - gm_status_t gmrc; - - LASSERT (npages > 0); - - for (i = 0; i < npages; i++) { - nb->nb_kiov[i].kiov_page = alloc_page(GFP_KERNEL); - nb->nb_kiov[i].kiov_offset = 0; - nb->nb_kiov[i].kiov_len = PAGE_SIZE; - - if (nb->nb_kiov[i].kiov_page == NULL) { - CERROR("Can't allocate page\n"); - gmnal_free_netbuf_pages(nb, i); - return -ENOMEM; - } - - CDEBUG(D_NET,"[%3d] page %p, phys "LPX64", @ "LPX64"\n", - i, nb->nb_kiov[i].kiov_page, - lnet_page2phys(nb->nb_kiov[i].kiov_page), - gmni->gmni_netaddr_base); - - gmrc = gm_register_memory_ex_phys( - gmni->gmni_port, - lnet_page2phys(nb->nb_kiov[i].kiov_page), - PAGE_SIZE, - gmni->gmni_netaddr_base); - CDEBUG(D_NET,"[%3d] page %p: %d\n", - i, nb->nb_kiov[i].kiov_page, gmrc); - - if (gmrc != GM_SUCCESS) { - CERROR("Can't map page: %d(%s)\n", gmrc, - gmnal_gmstatus2str(gmrc)); - gmnal_free_netbuf_pages(nb, i+1); - return -ENOMEM; - } - - if (i == 0) - nb->nb_netaddr = gmni->gmni_netaddr_base; - - gmni->gmni_netaddr_base += PAGE_SIZE; - } - - return 0; -} - -void -gmnal_free_ltxbuf (gmnal_ni_t *gmni, gmnal_txbuf_t *txb) -{ - int npages = gmni->gmni_large_pages; - - LASSERT (gmni->gmni_port == NULL); - /* No unmapping; the port has been closed */ - - gmnal_free_netbuf_pages(&txb->txb_buf, gmni->gmni_large_pages); - LIBCFS_FREE(txb, offsetof(gmnal_txbuf_t, txb_buf.nb_kiov[npages])); -} - -int -gmnal_alloc_ltxbuf (gmnal_ni_t *gmni) -{ - int npages = gmni->gmni_large_pages; - int sz = offsetof(gmnal_txbuf_t, txb_buf.nb_kiov[npages]); - gmnal_txbuf_t *txb; - int rc; - - LIBCFS_ALLOC(txb, sz); - if (txb == NULL) { - CERROR("Can't allocate large txbuffer\n"); - return -ENOMEM; - } - - rc = gmnal_alloc_netbuf_pages(gmni, &txb->txb_buf, npages); - if (rc != 0) { - LIBCFS_FREE(txb, sz); - return rc; - } - - list_add_tail(&txb->txb_list, &gmni->gmni_idle_ltxbs); - - txb->txb_next = gmni->gmni_ltxbs; - gmni->gmni_ltxbs = txb; - - return 0; -} - -void -gmnal_free_tx (gmnal_tx_t *tx) -{ - LASSERT (tx->tx_gmni->gmni_port == NULL); - - gmnal_free_netbuf_pages(&tx->tx_buf, 1); - LIBCFS_FREE(tx, sizeof(*tx)); -} - -int -gmnal_alloc_tx (gmnal_ni_t *gmni) -{ - gmnal_tx_t *tx; - int rc; - - LIBCFS_ALLOC(tx, sizeof(*tx)); - if (tx == NULL) { - CERROR("Failed to allocate tx\n"); - return -ENOMEM; - } - - memset(tx, 0, sizeof(*tx)); - - rc = gmnal_alloc_netbuf_pages(gmni, &tx->tx_buf, 1); - if (rc != 0) { - LIBCFS_FREE(tx, sizeof(*tx)); - return -ENOMEM; - } - - tx->tx_gmni = gmni; - - list_add_tail(&tx->tx_list, &gmni->gmni_idle_txs); - - tx->tx_next = gmni->gmni_txs; - gmni->gmni_txs = tx; - - return 0; -} - -void -gmnal_free_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx) -{ - int npages = rx->rx_islarge ? gmni->gmni_large_pages : 1; - - LASSERT (gmni->gmni_port == NULL); - - gmnal_free_netbuf_pages(&rx->rx_buf, npages); - LIBCFS_FREE(rx, offsetof(gmnal_rx_t, rx_buf.nb_kiov[npages])); -} - -int -gmnal_alloc_rx (gmnal_ni_t *gmni, int islarge) -{ - int npages = islarge ? gmni->gmni_large_pages : 1; - int sz = offsetof(gmnal_rx_t, rx_buf.nb_kiov[npages]); - int rc; - gmnal_rx_t *rx; - gm_status_t gmrc; - - LIBCFS_ALLOC(rx, sz); - if (rx == NULL) { - CERROR("Failed to allocate rx\n"); - return -ENOMEM; - } - - memset(rx, 0, sizeof(*rx)); - - rc = gmnal_alloc_netbuf_pages(gmni, &rx->rx_buf, npages); - if (rc != 0) { - LIBCFS_FREE(rx, sz); - return rc; - } - - rx->rx_islarge = islarge; - rx->rx_next = gmni->gmni_rxs; - gmni->gmni_rxs = rx; - - gmrc = gm_hash_insert(gmni->gmni_rx_hash, - GMNAL_NETBUF_LOCAL_NETADDR(&rx->rx_buf), rx); - if (gmrc != GM_SUCCESS) { - CERROR("Couldn't add rx to hash table: %d\n", gmrc); - return -ENOMEM; - } - - return 0; -} - -void -gmnal_free_ltxbufs (gmnal_ni_t *gmni) -{ - gmnal_txbuf_t *txb; - - while ((txb = gmni->gmni_ltxbs) != NULL) { - gmni->gmni_ltxbs = txb->txb_next; - gmnal_free_ltxbuf(gmni, txb); - } -} - -int -gmnal_alloc_ltxbufs (gmnal_ni_t *gmni) -{ - int nlarge_tx_bufs = *gmnal_tunables.gm_nlarge_tx_bufs; - int i; - int rc; - - for (i = 0; i < nlarge_tx_bufs; i++) { - rc = gmnal_alloc_ltxbuf(gmni); - - if (rc != 0) - return rc; - } - - return 0; -} - -void -gmnal_free_txs(gmnal_ni_t *gmni) -{ - gmnal_tx_t *tx; - - while ((tx = gmni->gmni_txs) != NULL) { - gmni->gmni_txs = tx->tx_next; - gmnal_free_tx (tx); - } -} - -int -gmnal_alloc_txs(gmnal_ni_t *gmni) -{ - int ntxcred = gm_num_send_tokens(gmni->gmni_port); - int ntx = *gmnal_tunables.gm_ntx; - int i; - int rc; - - CDEBUG(D_NET, "ntxcred: %d\n", ntxcred); - gmni->gmni_tx_credits = ntxcred; - - for (i = 0; i < ntx; i++) { - rc = gmnal_alloc_tx(gmni); - if (rc != 0) - return rc; - } - - return 0; -} - -void -gmnal_free_rxs(gmnal_ni_t *gmni) -{ - gmnal_rx_t *rx; - - while ((rx = gmni->gmni_rxs) != NULL) { - gmni->gmni_rxs = rx->rx_next; - - gmnal_free_rx(gmni, rx); - } - - LASSERT (gmni->gmni_port == NULL); -#if 0 - /* GM releases all resources allocated to a port when it closes */ - if (gmni->gmni_rx_hash != NULL) - gm_destroy_hash(gmni->gmni_rx_hash); -#endif -} - -int -gmnal_alloc_rxs (gmnal_ni_t *gmni) -{ - int nrxcred = gm_num_receive_tokens(gmni->gmni_port); - int nrx_small = *gmnal_tunables.gm_nrx_small; - int nrx_large = *gmnal_tunables.gm_nrx_large; - int nrx = nrx_large + nrx_small; - int rc; - int i; - - CDEBUG(D_NET, "nrxcred: %d(%dL+%dS)\n", nrxcred, nrx_large, nrx_small); - - if (nrx > nrxcred) { - int nlarge = (nrx_large * nrxcred)/nrx; - int nsmall = nrxcred - nlarge; - - CWARN("Only %d rx credits: " - "reducing large %d->%d, small %d->%d\n", nrxcred, - nrx_large, nlarge, nrx_small, nsmall); - - *gmnal_tunables.gm_nrx_large = nrx_large = nlarge; - *gmnal_tunables.gm_nrx_small = nrx_small = nsmall; - nrx = nlarge + nsmall; - } - - gmni->gmni_rx_hash = gm_create_hash(gm_hash_compare_ptrs, - gm_hash_hash_ptr, 0, 0, nrx, 0); - if (gmni->gmni_rx_hash == NULL) { - CERROR("Failed to create hash table\n"); - return -ENOMEM; - } - - for (i = 0; i < nrx; i++ ) { - rc = gmnal_alloc_rx(gmni, i < nrx_large); - if (rc != 0) - return rc; - } - - return 0; -} - -char * -gmnal_gmstatus2str(gm_status_t status) -{ - return(gm_strerror(status)); - - switch(status) { - case(GM_SUCCESS): - return("SUCCESS"); - case(GM_FAILURE): - return("FAILURE"); - case(GM_INPUT_BUFFER_TOO_SMALL): - return("INPUT_BUFFER_TOO_SMALL"); - case(GM_OUTPUT_BUFFER_TOO_SMALL): - return("OUTPUT_BUFFER_TOO_SMALL"); - case(GM_TRY_AGAIN ): - return("TRY_AGAIN"); - case(GM_BUSY): - return("BUSY"); - case(GM_MEMORY_FAULT): - return("MEMORY_FAULT"); - case(GM_INTERRUPTED): - return("INTERRUPTED"); - case(GM_INVALID_PARAMETER): - return("INVALID_PARAMETER"); - case(GM_OUT_OF_MEMORY): - return("OUT_OF_MEMORY"); - case(GM_INVALID_COMMAND): - return("INVALID_COMMAND"); - case(GM_PERMISSION_DENIED): - return("PERMISSION_DENIED"); - case(GM_INTERNAL_ERROR): - return("INTERNAL_ERROR"); - case(GM_UNATTACHED): - return("UNATTACHED"); - case(GM_UNSUPPORTED_DEVICE): - return("UNSUPPORTED_DEVICE"); - case(GM_SEND_TIMED_OUT): - return("GM_SEND_TIMEDOUT"); - case(GM_SEND_REJECTED): - return("GM_SEND_REJECTED"); - case(GM_SEND_TARGET_PORT_CLOSED): - return("GM_SEND_TARGET_PORT_CLOSED"); - case(GM_SEND_TARGET_NODE_UNREACHABLE): - return("GM_SEND_TARGET_NODE_UNREACHABLE"); - case(GM_SEND_DROPPED): - return("GM_SEND_DROPPED"); - case(GM_SEND_PORT_CLOSED): - return("GM_SEND_PORT_CLOSED"); - case(GM_NODE_ID_NOT_YET_SET): - return("GM_NODE_ID_NOT_YET_SET"); - case(GM_STILL_SHUTTING_DOWN): - return("GM_STILL_SHUTTING_DOWN"); - case(GM_CLONE_BUSY): - return("GM_CLONE_BUSY"); - case(GM_NO_SUCH_DEVICE): - return("GM_NO_SUCH_DEVICE"); - case(GM_ABORTED): - return("GM_ABORTED"); - case(GM_INCOMPATIBLE_LIB_AND_DRIVER): - return("GM_INCOMPATIBLE_LIB_AND_DRIVER"); - case(GM_UNTRANSLATED_SYSTEM_ERROR): - return("GM_UNTRANSLATED_SYSTEM_ERROR"); - case(GM_ACCESS_DENIED): - return("GM_ACCESS_DENIED"); - - - /* - * These ones are in the docs but aren't in the header file - case(GM_DEV_NOT_FOUND): - return("GM_DEV_NOT_FOUND"); - case(GM_INVALID_PORT_NUMBER): - return("GM_INVALID_PORT_NUMBER"); - case(GM_UC_ERROR): - return("GM_US_ERROR"); - case(GM_PAGE_TABLE_FULL): - return("GM_PAGE_TABLE_FULL"); - case(GM_MINOR_OVERFLOW): - return("GM_MINOR_OVERFLOW"); - case(GM_SEND_ORPHANED): - return("GM_SEND_ORPHANED"); - case(GM_HARDWARE_FAULT): - return("GM_HARDWARE_FAULT"); - case(GM_DATA_CORRUPTED): - return("GM_DATA_CORRUPTED"); - case(GM_TIMED_OUT): - return("GM_TIMED_OUT"); - case(GM_USER_ERROR): - return("GM_USER_ERROR"); - case(GM_NO_MATCH): - return("GM_NOMATCH"); - case(GM_NOT_SUPPORTED_IN_KERNEL): - return("GM_NOT_SUPPORTED_IN_KERNEL"); - case(GM_NOT_SUPPORTED_ON_ARCH): - return("GM_NOT_SUPPORTED_ON_ARCH"); - case(GM_PTE_REF_CNT_OVERFLOW): - return("GM_PTR_REF_CNT_OVERFLOW"); - case(GM_NO_DRIVER_SUPPORT): - return("GM_NO_DRIVER_SUPPORT"); - case(GM_FIRMWARE_NOT_RUNNING): - return("GM_FIRMWARE_NOT_RUNNING"); - * These ones are in the docs but aren't in the header file - */ - - default: - return("UNKNOWN GM ERROR CODE"); - } -} - - -char * -gmnal_rxevent2str(gm_recv_event_t *ev) -{ - short event; - event = GM_RECV_EVENT_TYPE(ev); - switch(event) { - case(GM_NO_RECV_EVENT): - return("GM_NO_RECV_EVENT"); - case(GM_SENDS_FAILED_EVENT): - return("GM_SEND_FAILED_EVENT"); - case(GM_ALARM_EVENT): - return("GM_ALARM_EVENT"); - case(GM_SENT_EVENT): - return("GM_SENT_EVENT"); - case(_GM_SLEEP_EVENT): - return("_GM_SLEEP_EVENT"); - case(GM_RAW_RECV_EVENT): - return("GM_RAW_RECV_EVENT"); - case(GM_BAD_SEND_DETECTED_EVENT): - return("GM_BAD_SEND_DETECTED_EVENT"); - case(GM_SEND_TOKEN_VIOLATION_EVENT): - return("GM_SEND_TOKEN_VIOLATION_EVENT"); - case(GM_RECV_TOKEN_VIOLATION_EVENT): - return("GM_RECV_TOKEN_VIOLATION_EVENT"); - case(GM_BAD_RECV_TOKEN_EVENT): - return("GM_BAD_RECV_TOKEN_EVENT"); - case(GM_ALARM_VIOLATION_EVENT): - return("GM_ALARM_VIOLATION_EVENT"); - case(GM_RECV_EVENT): - return("GM_RECV_EVENT"); - case(GM_HIGH_RECV_EVENT): - return("GM_HIGH_RECV_EVENT"); - case(GM_PEER_RECV_EVENT): - return("GM_PEER_RECV_EVENT"); - case(GM_HIGH_PEER_RECV_EVENT): - return("GM_HIGH_PEER_RECV_EVENT"); - case(GM_FAST_RECV_EVENT): - return("GM_FAST_RECV_EVENT"); - case(GM_FAST_HIGH_RECV_EVENT): - return("GM_FAST_HIGH_RECV_EVENT"); - case(GM_FAST_PEER_RECV_EVENT): - return("GM_FAST_PEER_RECV_EVENT"); - case(GM_FAST_HIGH_PEER_RECV_EVENT): - return("GM_FAST_HIGH_PEER_RECV_EVENT"); - case(GM_REJECTED_SEND_EVENT): - return("GM_REJECTED_SEND_EVENT"); - case(GM_ORPHANED_SEND_EVENT): - return("GM_ORPHANED_SEND_EVENT"); - case(GM_BAD_RESEND_DETECTED_EVENT): - return("GM_BAD_RESEND_DETETED_EVENT"); - case(GM_DROPPED_SEND_EVENT): - return("GM_DROPPED_SEND_EVENT"); - case(GM_BAD_SEND_VMA_EVENT): - return("GM_BAD_SEND_VMA_EVENT"); - case(GM_BAD_RECV_VMA_EVENT): - return("GM_BAD_RECV_VMA_EVENT"); - case(_GM_FLUSHED_ALARM_EVENT): - return("GM_FLUSHED_ALARM_EVENT"); - case(GM_SENT_TOKENS_EVENT): - return("GM_SENT_TOKENS_EVENTS"); - case(GM_IGNORE_RECV_EVENT): - return("GM_IGNORE_RECV_EVENT"); - case(GM_ETHERNET_RECV_EVENT): - return("GM_ETHERNET_RECV_EVENT"); - case(GM_NEW_NO_RECV_EVENT): - return("GM_NEW_NO_RECV_EVENT"); - case(GM_NEW_SENDS_FAILED_EVENT): - return("GM_NEW_SENDS_FAILED_EVENT"); - case(GM_NEW_ALARM_EVENT): - return("GM_NEW_ALARM_EVENT"); - case(GM_NEW_SENT_EVENT): - return("GM_NEW_SENT_EVENT"); - case(_GM_NEW_SLEEP_EVENT): - return("GM_NEW_SLEEP_EVENT"); - case(GM_NEW_RAW_RECV_EVENT): - return("GM_NEW_RAW_RECV_EVENT"); - case(GM_NEW_BAD_SEND_DETECTED_EVENT): - return("GM_NEW_BAD_SEND_DETECTED_EVENT"); - case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT): - return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT"); - case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT): - return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT"); - case(GM_NEW_BAD_RECV_TOKEN_EVENT): - return("GM_NEW_BAD_RECV_TOKEN_EVENT"); - case(GM_NEW_ALARM_VIOLATION_EVENT): - return("GM_NEW_ALARM_VIOLATION_EVENT"); - case(GM_NEW_RECV_EVENT): - return("GM_NEW_RECV_EVENT"); - case(GM_NEW_HIGH_RECV_EVENT): - return("GM_NEW_HIGH_RECV_EVENT"); - case(GM_NEW_PEER_RECV_EVENT): - return("GM_NEW_PEER_RECV_EVENT"); - case(GM_NEW_HIGH_PEER_RECV_EVENT): - return("GM_NEW_HIGH_PEER_RECV_EVENT"); - case(GM_NEW_FAST_RECV_EVENT): - return("GM_NEW_FAST_RECV_EVENT"); - case(GM_NEW_FAST_HIGH_RECV_EVENT): - return("GM_NEW_FAST_HIGH_RECV_EVENT"); - case(GM_NEW_FAST_PEER_RECV_EVENT): - return("GM_NEW_FAST_PEER_RECV_EVENT"); - case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT): - return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT"); - case(GM_NEW_REJECTED_SEND_EVENT): - return("GM_NEW_REJECTED_SEND_EVENT"); - case(GM_NEW_ORPHANED_SEND_EVENT): - return("GM_NEW_ORPHANED_SEND_EVENT"); - case(_GM_NEW_PUT_NOTIFICATION_EVENT): - return("_GM_NEW_PUT_NOTIFICATION_EVENT"); - case(GM_NEW_FREE_SEND_TOKEN_EVENT): - return("GM_NEW_FREE_SEND_TOKEN_EVENT"); - case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT): - return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT"); - case(GM_NEW_BAD_RESEND_DETECTED_EVENT): - return("GM_NEW_BAD_RESEND_DETECTED_EVENT"); - case(GM_NEW_DROPPED_SEND_EVENT): - return("GM_NEW_DROPPED_SEND_EVENT"); - case(GM_NEW_BAD_SEND_VMA_EVENT): - return("GM_NEW_BAD_SEND_VMA_EVENT"); - case(GM_NEW_BAD_RECV_VMA_EVENT): - return("GM_NEW_BAD_RECV_VMA_EVENT"); - case(_GM_NEW_FLUSHED_ALARM_EVENT): - return("GM_NEW_FLUSHED_ALARM_EVENT"); - case(GM_NEW_SENT_TOKENS_EVENT): - return("GM_NEW_SENT_TOKENS_EVENT"); - case(GM_NEW_IGNORE_RECV_EVENT): - return("GM_NEW_IGNORE_RECV_EVENT"); - case(GM_NEW_ETHERNET_RECV_EVENT): - return("GM_NEW_ETHERNET_RECV_EVENT"); - default: - return("Unknown Recv event"); - /* _GM_PUT_NOTIFICATION_EVENT */ - /* GM_FREE_SEND_TOKEN_EVENT */ - /* GM_FREE_HIGH_SEND_TOKEN_EVENT */ - } -} - - -void -gmnal_yield(int delay) -{ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(delay); -} diff --git a/lnet/klnds/iiblnd/.cvsignore b/lnet/klnds/iiblnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/iiblnd/Makefile.in b/lnet/klnds/iiblnd/Makefile.in deleted file mode 100644 index 7ee9b6444ab92fa63b1edaa50d5c7c58dfd1c93c..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kiiblnd -kiiblnd-objs := iiblnd.o iiblnd_cb.o iiblnd_modparams.o - -EXTRA_POST_CFLAGS := @IIBCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/iiblnd/autoMakefile.am b/lnet/klnds/iiblnd/autoMakefile.am deleted file mode 100644 index d08d07973f1ae42294ad601e43b046ed3073d447..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_IIBLND -modulenet_DATA = kiiblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kiiblnd-objs:%.o=%.c) iiblnd.h diff --git a/lnet/klnds/iiblnd/iiblnd.c b/lnet/klnds/iiblnd/iiblnd.c deleted file mode 100644 index 6e1889f98b871f723966ada14aa62575461221b1..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/iiblnd.c +++ /dev/null @@ -1,2155 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "iiblnd.h" - -lnd_t the_kiblnd = { - .lnd_type = IIBLND, - .lnd_startup = kibnal_startup, - .lnd_shutdown = kibnal_shutdown, - .lnd_ctl = kibnal_ctl, - .lnd_send = kibnal_send, - .lnd_recv = kibnal_recv, - .lnd_eager_recv = kibnal_eager_recv, -}; - -kib_data_t kibnal_data; - -__u32 -kibnal_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kibnal_init_msg(kib_msg_t *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob; -} - -void -kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, - lnet_nid_t dstnid, __u64 dststamp, __u64 seq) -{ - /* CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. */ - msg->ibm_magic = IBNAL_MSG_MAGIC; - msg->ibm_version = version; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid, - dstnid); - msg->ibm_srcstamp = kibnal_data.kib_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - msg->ibm_seq = seq; - - if (*kibnal_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob); - } -} - -void -kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob, - int type, lnet_nid_t dstnid, __u64 dststamp) -{ - LASSERT (nob >= offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)); - - memset(msg, 0, nob); - kibnal_init_msg(msg, type, sizeof(kib_connparams_t)); - - msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - msg->ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE; - msg->ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS; - - kibnal_pack_msg(msg, version, 0, dstnid, dststamp, 0); -} - -int -kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) -{ - const int hdr_size = offsetof(kib_msg_t, ibm_u); - __u32 msg_cksum; - __u32 msg_version; - int flip; - int msg_nob; -#if !IBNAL_USE_FMR - int i; - int n; -#endif - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - /* Future protocol version compatibility support! - * If the iiblnd-specific protocol changes, or when LNET unifies - * protocols over all LNDs, the initial connection will negotiate a - * protocol version. If I find this, I avoid any console errors. If - * my is doing connection establishment, the reject will tell the peer - * which version I'm running. */ - - if (msg->ibm_magic == IBNAL_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) { - flip = 1; - } else { - if (msg->ibm_magic == LNET_PROTO_MAGIC || - msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) - return -EPROTO; - - /* Completely out to lunch */ - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; - if (expected_version == 0) { - if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD && - msg_version != IBNAL_MSG_VERSION) - return -EPROTO; - } else if (msg_version != expected_version) { - CERROR("Bad version: %x(%x expected)\n", - msg_version, expected_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kibnal_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - msg->ibm_version = msg_version; - CLASSERT (sizeof(msg->ibm_type) == 1); - CLASSERT (sizeof(msg->ibm_credits) == 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - __swab64s(&msg->ibm_seq); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_IMMEDIATE: - if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { - CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) { - CERROR("Short PUT_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putreq))); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_ACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putack))); - return -EPROTO; - } -#if IBNAL_USE_FMR - if (flip) { - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag); - } - - n = msg->ibm_u.putack.ibpam_rd.rd_nfrag; - if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", - n, IBNAL_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) { - for (i = 0; i < n; i++) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob); - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr); - } - } -#endif - break; - - case IBNAL_MSG_GET_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.get))); - return -EPROTO; - } -#if IBNAL_USE_FMR - if (flip) { - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag); - } - - n = msg->ibm_u.get.ibgm_rd.rd_nfrag; - if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", - n, IBNAL_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) - for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob); - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr); - } -#endif - break; - - case IBNAL_MSG_PUT_NAK: - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { - CERROR("Short RDMA completion: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.completion))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - - case IBNAL_MSG_CONNREQ: - case IBNAL_MSG_CONNACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { - CERROR("Short connreq/ack: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.connparams))); - return -EPROTO; - } - if (flip) { - __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth); - __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); - __swab32s(&msg->ibm_u.connparams.ibcp_max_frags); - } - break; - } - return 0; -} - -IB_HANDLE -kibnal_create_cep(lnet_nid_t nid) -{ - FSTATUS frc; - __u32 u32val; - IB_HANDLE cep; - - cep = iba_cm_create_cep(CM_RC_TYPE); - if (cep == NULL) { - CERROR ("Can't create CEP for %s\n", - (nid == LNET_NID_ANY) ? "listener" : - libcfs_nid2str(nid)); - return NULL; - } - - if (nid == LNET_NID_ANY) { - u32val = 1; - frc = iba_cm_modify_cep(cep, CM_FLAG_ASYNC_ACCEPT, - (char *)&u32val, sizeof(u32val), 0); - if (frc != FSUCCESS) { - CERROR("Can't set async_accept: %d\n", frc); - goto failed; - } - - u32val = 0; /* sets system max */ - frc = iba_cm_modify_cep(cep, CM_FLAG_LISTEN_BACKLOG, - (char *)&u32val, sizeof(u32val), 0); - if (frc != FSUCCESS) { - CERROR("Can't set listen backlog: %d\n", frc); - goto failed; - } - } - - u32val = 1; - frc = iba_cm_modify_cep(cep, CM_FLAG_TIMEWAIT_CALLBACK, - (char *)&u32val, sizeof(u32val), 0); - if (frc != FSUCCESS) { - CERROR("Can't set timewait_callback for %s: %d\n", - (nid == LNET_NID_ANY) ? "listener" : - libcfs_nid2str(nid), frc); - goto failed; - } - - return cep; - - failed: - iba_cm_destroy_cep(cep); - return NULL; -} - -#define IBNAL_CHECK_ADVERT 1 -#if IBNAL_CHECK_ADVERT -void -kibnal_service_query_done (void *arg, QUERY *qry, - QUERY_RESULT_VALUES *qry_result) -{ - int *rcp = arg; - FSTATUS frc = qry_result->Status; - SERVICE_RECORD_RESULTS *svc_rslt; - IB_SERVICE_RECORD *svc; - lnet_nid_t nid; - - if (frc != FSUCCESS || qry_result->ResultDataSize == 0) { - CERROR("Error checking advert: status %d data size %d\n", - frc, qry_result->ResultDataSize); - *rcp = -EIO; - goto out; - } - - svc_rslt = (SERVICE_RECORD_RESULTS *)qry_result->QueryResult; - - if (svc_rslt->NumServiceRecords < 1) { - CERROR("Check advert: %d records\n", - svc_rslt->NumServiceRecords); - *rcp = -ENOENT; - goto out; - } - - svc = &svc_rslt->ServiceRecords[0]; - nid = le64_to_cpu(*kibnal_service_nid_field(svc)); - - CDEBUG(D_NET, "Check advert: %s "LPX64" "LPX64":%04x\n", - libcfs_nid2str(nid), svc->RID.ServiceID, - svc->RID.ServiceGID.Type.Global.InterfaceID, - svc->RID.ServiceP_Key); - - if (nid != kibnal_data.kib_ni->ni_nid) { - CERROR("Check advert: Bad NID %s (%s expected)\n", - libcfs_nid2str(nid), - libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - *rcp = -EINVAL; - goto out; - } - - if (svc->RID.ServiceID != *kibnal_tunables.kib_service_number) { - CERROR("Check advert: Bad ServiceID "LPX64" (%x expected)\n", - svc->RID.ServiceID, - *kibnal_tunables.kib_service_number); - *rcp = -EINVAL; - goto out; - } - - if (svc->RID.ServiceGID.Type.Global.InterfaceID != - kibnal_data.kib_port_guid) { - CERROR("Check advert: Bad GUID "LPX64" ("LPX64" expected)\n", - svc->RID.ServiceGID.Type.Global.InterfaceID, - kibnal_data.kib_port_guid); - *rcp = -EINVAL; - goto out; - } - - if (svc->RID.ServiceP_Key != kibnal_data.kib_port_pkey) { - CERROR("Check advert: Bad PKEY %04x (%04x expected)\n", - svc->RID.ServiceP_Key, kibnal_data.kib_port_pkey); - *rcp = -EINVAL; - goto out; - } - - CDEBUG(D_NET, "Check advert OK\n"); - *rcp = 0; - - out: - up (&kibnal_data.kib_listener_signal); -} - -int -kibnal_check_advert (void) -{ - /* single-threaded */ - static QUERY qry; - - FSTATUS frc; - int rc; - - memset (&qry, 0, sizeof(qry)); - qry.InputType = InputTypeServiceRecord; - qry.OutputType = OutputTypeServiceRecord; - kibnal_set_service_keys(&qry.InputValue.ServiceRecordValue.ServiceRecord, - kibnal_data.kib_ni->ni_nid); - qry.InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK; - - frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - &qry, - kibnal_service_query_done, - &kibnal_data.kib_sdretry, - &rc); - if (frc != FPENDING) { - CERROR ("Immediate error %d checking SM service\n", frc); - return -EIO; - } - - down (&kibnal_data.kib_listener_signal); - - if (rc != 0) - CERROR ("Error %d checking SM service\n", rc); - return rc; -} -#else -int -kibnal_check_advert(void) -{ - return 0; -} -#endif - -void -kibnal_fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type) -{ - IB_SERVICE_RECORD *svc; - - memset (fod, 0, sizeof(*fod)); - fod->Type = type; - - svc = &fod->Value.ServiceRecordValue.ServiceRecord; - svc->RID.ServiceID = *kibnal_tunables.kib_service_number; - svc->RID.ServiceGID.Type.Global.InterfaceID = kibnal_data.kib_port_guid; - svc->RID.ServiceGID.Type.Global.SubnetPrefix = DEFAULT_SUBNET_PREFIX; - svc->RID.ServiceP_Key = kibnal_data.kib_port_pkey; - svc->ServiceLease = 0xffffffff; - - kibnal_set_service_keys(svc, kibnal_data.kib_ni->ni_nid); -} - -void -kibnal_service_setunset_done (void *arg, FABRIC_OPERATION_DATA *fod, - FSTATUS frc, uint32 madrc) -{ - *(FSTATUS *)arg = frc; - up (&kibnal_data.kib_listener_signal); -} - -int -kibnal_advertise (void) -{ - /* Single threaded here */ - static FABRIC_OPERATION_DATA fod; - - IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord; - FSTATUS frc; - FSTATUS frc2; - - if (strlen(*kibnal_tunables.kib_service_name) >= - sizeof(svc->ServiceName)) { - CERROR("Service name '%s' too long (%d chars max)\n", - *kibnal_tunables.kib_service_name, - (int)sizeof(svc->ServiceName) - 1); - return -EINVAL; - } - - kibnal_fill_fod(&fod, FabOpSetServiceRecord); - - CDEBUG(D_NET, "Advertising service id "LPX64" %s:%s\n", - svc->RID.ServiceID, svc->ServiceName, - libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc)))); - - frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - &fod, - kibnal_service_setunset_done, - &kibnal_data.kib_sdretry, - &frc2); - - if (frc != FSUCCESS && frc != FPENDING) { - CERROR ("Immediate error %d advertising NID %s\n", - frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - return -EIO; - } - - down (&kibnal_data.kib_listener_signal); - - frc = frc2; - if (frc == FSUCCESS) - return 0; - - CERROR ("Error %d advertising %s\n", - frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - return -EIO; -} - -void -kibnal_unadvertise (int expect_success) -{ - /* single threaded */ - static FABRIC_OPERATION_DATA fod; - - IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord; - FSTATUS frc; - FSTATUS frc2; - - LASSERT (kibnal_data.kib_ni->ni_nid != LNET_NID_ANY); - - kibnal_fill_fod(&fod, FabOpDeleteServiceRecord); - - CDEBUG(D_NET, "Unadvertising service %s:%s\n", - svc->ServiceName, - libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc)))); - - frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - &fod, - kibnal_service_setunset_done, - &kibnal_data.kib_sdretry, - &frc2); - if (frc != FSUCCESS && frc != FPENDING) { - CERROR ("Immediate error %d unadvertising NID %s\n", - frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - return; - } - - down (&kibnal_data.kib_listener_signal); - - CDEBUG(D_NET, "Unadvertise rc: %d\n", frc2); - - if ((frc2 == FSUCCESS) == !!expect_success) - return; - - if (expect_success) - CERROR("Error %d unadvertising NID %s\n", - frc2, libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - else - CWARN("Removed conflicting NID %s\n", - libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); -} - -void -kibnal_stop_listener(int normal_shutdown) -{ - /* NB this also disables peer creation and destroys all existing - * peers */ - IB_HANDLE cep = kibnal_data.kib_listener_cep; - unsigned long flags; - FSTATUS frc; - - LASSERT (cep != NULL); - - kibnal_unadvertise(normal_shutdown); - - frc = iba_cm_cancel(cep); - if (frc != FSUCCESS && frc != FPENDING) - CERROR ("Error %d stopping listener\n", frc); - - down(&kibnal_data.kib_listener_signal); - - frc = iba_cm_destroy_cep(cep); - if (frc != FSUCCESS) - CERROR ("Error %d destroying listener CEP\n", frc); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - /* This assignment disables peer creation */ - kibnal_data.kib_listener_cep = NULL; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - /* Start to tear down any peers created while the listener was - * running */ - kibnal_del_peer(LNET_NID_ANY); -} - -int -kibnal_start_listener(void) -{ - /* NB this also enables peer creation */ - - IB_HANDLE cep; - CM_LISTEN_INFO info; - unsigned long flags; - int rc; - FSTATUS frc; - - LASSERT (kibnal_data.kib_listener_cep == NULL); - init_MUTEX_LOCKED (&kibnal_data.kib_listener_signal); - - cep = kibnal_create_cep(LNET_NID_ANY); - if (cep == NULL) - return -ENOMEM; - - memset (&info, 0, sizeof(info)); - info.ListenAddr.EndPt.SID = *kibnal_tunables.kib_service_number; - - frc = iba_cm_listen(cep, &info, kibnal_listen_callback, NULL); - if (frc != FSUCCESS && frc != FPENDING) { - CERROR ("iba_cm_listen error: %d\n", frc); - - iba_cm_destroy_cep(cep); - return -EIO; - } - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - /* This assignment enables peer creation */ - kibnal_data.kib_listener_cep = cep; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - rc = kibnal_advertise(); - if (rc == 0) - rc = kibnal_check_advert(); - - if (rc == 0) - return 0; - - kibnal_stop_listener(0); - return rc; -} - -int -kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid) -{ - kib_peer_t *peer; - unsigned long flags; - int rc; - - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC (peer, sizeof (*peer)); - if (peer == NULL) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->ibp_nid = nid; - atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD (&peer->ibp_conns); - INIT_LIST_HEAD (&peer->ibp_tx_queue); - - peer->ibp_error = 0; - peer->ibp_last_alive = cfs_time_current(); - peer->ibp_reconnect_interval = 0; /* OK to connect at any time */ - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (atomic_read(&kibnal_data.kib_npeers) >= - *kibnal_tunables.kib_concurrent_peers) { - rc = -EOVERFLOW; /* !! but at least it distinguishes */ - } else if (kibnal_data.kib_listener_cep == NULL) { - rc = -ESHUTDOWN; /* shutdown has started */ - } else { - rc = 0; - /* npeers only grows with the global lock held */ - atomic_inc(&kibnal_data.kib_npeers); - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (rc != 0) { - CERROR("Can't create peer: %s\n", - (rc == -ESHUTDOWN) ? "shutting down" : - "too many peers"); - LIBCFS_FREE(peer, sizeof(*peer)); - } else { - *peerp = peer; - } - - return rc; -} - -void -kibnal_destroy_peer (kib_peer_t *peer) -{ - - LASSERT (atomic_read (&peer->ibp_refcount) == 0); - LASSERT (peer->ibp_persistence == 0); - LASSERT (!kibnal_peer_active(peer)); - LASSERT (!kibnal_peer_connecting(peer)); - LASSERT (list_empty (&peer->ibp_conns)); - LASSERT (list_empty (&peer->ibp_tx_queue)); - - LIBCFS_FREE (peer, sizeof (*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec (&kibnal_data.kib_npeers); -} - -/* the caller is responsible for accounting for the additional reference - * that this creates */ -kib_peer_t * -kibnal_find_peer_locked (lnet_nid_t nid) -{ - struct list_head *peer_list = kibnal_nid2peerlist (nid); - struct list_head *tmp; - kib_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry (tmp, kib_peer_t, ibp_list); - - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - if (peer->ibp_nid != nid) - continue; - - CDEBUG(D_NET, "got peer %s (%d)\n", - libcfs_nid2str(nid), atomic_read (&peer->ibp_refcount)); - return (peer); - } - return (NULL); -} - -void -kibnal_unlink_peer_locked (kib_peer_t *peer) -{ - LASSERT (peer->ibp_persistence == 0); - LASSERT (list_empty(&peer->ibp_conns)); - - LASSERT (kibnal_peer_active(peer)); - list_del_init (&peer->ibp_list); - /* lose peerlist's ref */ - kibnal_peer_decref(peer); -} - -int -kibnal_get_peer_info (int index, lnet_nid_t *nidp, int *persistencep) -{ - kib_peer_t *peer; - struct list_head *ptmp; - unsigned long flags; - int i; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *persistencep = peer->ibp_persistence; - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (0); - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (-ENOENT); -} - -int -kibnal_add_persistent_peer (lnet_nid_t nid) -{ - unsigned long flags; - kib_peer_t *peer; - kib_peer_t *peer2; - int rc; - - if (nid == LNET_NID_ANY) - return (-EINVAL); - - rc = kibnal_create_peer(&peer, nid); - if (rc != 0) - return rc; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - /* I'm always called with a reference on kibnal_data.kib_ni - * so shutdown can't have started */ - LASSERT (kibnal_data.kib_listener_cep != NULL); - - peer2 = kibnal_find_peer_locked (nid); - if (peer2 != NULL) { - kibnal_peer_decref (peer); - peer = peer2; - } else { - /* peer table takes existing ref on peer */ - list_add_tail (&peer->ibp_list, - kibnal_nid2peerlist (nid)); - } - - peer->ibp_persistence++; - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return (0); -} - -void -kibnal_del_peer_locked (kib_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kib_conn_t *conn; - - peer->ibp_persistence = 0; - - if (list_empty(&peer->ibp_conns)) { - kibnal_unlink_peer_locked(peer); - } else { - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - kibnal_close_conn_locked (conn, 0); - } - /* NB peer is no longer persistent; closing its last conn - * unlinked it. */ - } - /* NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. */ -} - -int -kibnal_del_peer (lnet_nid_t nid) -{ - unsigned long flags; - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - kib_peer_t *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT (list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, &zombies); - } - - kibnal_del_peer_locked (peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_txlist_done(&zombies, -EIO); - - return (rc); -} - -kib_conn_t * -kibnal_get_conn_by_idx (int index) -{ - kib_peer_t *peer; - struct list_head *ptmp; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - int i; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - list_for_each (ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry (ctmp, kib_conn_t, ibc_list); - kibnal_conn_addref(conn); - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (conn); - } - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (NULL); -} - -int -kibnal_conn_rts(kib_conn_t *conn, - __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn) -{ - IB_PATH_RECORD *path = &conn->ibc_cvars->cv_path; - IB_HANDLE qp = conn->ibc_qp; - IB_QP_ATTRIBUTES_MODIFY modify_attr; - FSTATUS frc; - int rc; - - if (resp_res > kibnal_data.kib_hca_attrs.MaxQPResponderResources) - resp_res = kibnal_data.kib_hca_attrs.MaxQPResponderResources; - - if (init_depth > kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth) - init_depth = kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth; - - modify_attr = (IB_QP_ATTRIBUTES_MODIFY) { - .RequestState = QPStateReadyToRecv, - .RecvPSN = IBNAL_STARTING_PSN, - .DestQPNumber = qpn, - .ResponderResources = resp_res, - .MinRnrTimer = UsecToRnrNakTimer(2000), /* 20 ms */ - .Attrs = (IB_QP_ATTR_RECVPSN | - IB_QP_ATTR_DESTQPNUMBER | - IB_QP_ATTR_RESPONDERRESOURCES | - IB_QP_ATTR_DESTAV | - IB_QP_ATTR_PATHMTU | - IB_QP_ATTR_MINRNRTIMER), - }; - GetAVFromPath(0, path, &modify_attr.PathMTU, NULL, - &modify_attr.DestAV); - - frc = iba_modify_qp(qp, &modify_attr, NULL); - if (frc != FSUCCESS) { - CERROR("Can't set QP %s ready to receive: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - return -EIO; - } - - rc = kibnal_post_receives(conn); - if (rc != 0) { - CERROR("Can't post receives for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - return rc; - } - - modify_attr = (IB_QP_ATTRIBUTES_MODIFY) { - .RequestState = QPStateReadyToSend, - .FlowControl = TRUE, - .InitiatorDepth = init_depth, - .SendPSN = psn, - .LocalAckTimeout = path->PktLifeTime + 2, /* 2 or 1? */ - .RetryCount = IBNAL_RETRY, - .RnrRetryCount = IBNAL_RNR_RETRY, - .Attrs = (IB_QP_ATTR_FLOWCONTROL | - IB_QP_ATTR_INITIATORDEPTH | - IB_QP_ATTR_SENDPSN | - IB_QP_ATTR_LOCALACKTIMEOUT | - IB_QP_ATTR_RETRYCOUNT | - IB_QP_ATTR_RNRRETRYCOUNT), - }; - - frc = iba_modify_qp(qp, &modify_attr, NULL); - if (frc != FSUCCESS) { - CERROR("Can't set QP %s ready to send: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - return -EIO; - } - - frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL); - if (frc != FSUCCESS) { - CERROR ("Can't query QP %s attributes: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - return -EIO; - } - - return 0; -} - -kib_conn_t * -kibnal_create_conn (lnet_nid_t nid, int proto_version) -{ - kib_conn_t *conn; - int i; - int page_offset; - int ipage; - int rc; - FSTATUS frc; - union { - IB_QP_ATTRIBUTES_CREATE qp_create; - IB_QP_ATTRIBUTES_MODIFY qp_attr; - } params; - - LIBCFS_ALLOC (conn, sizeof (*conn)); - if (conn == NULL) { - CERROR ("Can't allocate connection for %s\n", - libcfs_nid2str(nid)); - return (NULL); - } - - /* zero flags, NULL pointers etc... */ - memset (conn, 0, sizeof (*conn)); - conn->ibc_state = IBNAL_CONN_INIT_NOTHING; - conn->ibc_version = proto_version; - - INIT_LIST_HEAD (&conn->ibc_early_rxs); - INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD (&conn->ibc_tx_queue); - INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD (&conn->ibc_active_txs); - spin_lock_init (&conn->ibc_lock); - - atomic_inc (&kibnal_data.kib_nconns); - /* well not really, but I call destroy() on failure, which decrements */ - - LIBCFS_ALLOC(conn->ibc_cvars, sizeof (*conn->ibc_cvars)); - if (conn->ibc_cvars == NULL) { - CERROR ("Can't allocate connvars for %s\n", - libcfs_nid2str(nid)); - goto failed; - } - memset(conn->ibc_cvars, 0, sizeof (*conn->ibc_cvars)); - - LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t)); - if (conn->ibc_rxs == NULL) { - CERROR("Cannot allocate RX descriptors for %s\n", - libcfs_nid2str(nid)); - goto failed; - } - memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES); - if (rc != 0) { - CERROR("Can't allocate RX buffers for %s\n", - libcfs_nid2str(nid)); - goto failed; - } - - for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) { - struct page *page = conn->ibc_rx_pages->ibp_pages[ipage]; - kib_rx_t *rx = &conn->ibc_rxs[i]; - - rx->rx_conn = conn; - rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - rx->rx_hca_msg = kibnal_data.kib_whole_mem.md_addr + - lnet_page2phys(page) + page_offset; - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_RX_MSG_PAGES); - } - } - - params.qp_create = (IB_QP_ATTRIBUTES_CREATE) { - .Type = QPTypeReliableConnected, - .SendQDepth = (1 + IBNAL_MAX_RDMA_FRAGS) * - (*kibnal_tunables.kib_concurrent_sends), - .RecvQDepth = IBNAL_RX_MSGS, - .SendDSListDepth = 1, - .RecvDSListDepth = 1, - .SendCQHandle = kibnal_data.kib_cq, - .RecvCQHandle = kibnal_data.kib_cq, - .PDHandle = kibnal_data.kib_pd, - .SendSignaledCompletions = TRUE, - }; - frc = iba_create_qp(kibnal_data.kib_hca, ¶ms.qp_create, NULL, - &conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs); - if (frc != 0) { - CERROR ("Can't create QP %s: %d\n", libcfs_nid2str(nid), frc); - goto failed; - } - - /* Mark QP created */ - kibnal_set_conn_state(conn, IBNAL_CONN_INIT_QP); - - params.qp_attr = (IB_QP_ATTRIBUTES_MODIFY) { - .RequestState = QPStateInit, - .Attrs = (IB_QP_ATTR_PORTGUID | - IB_QP_ATTR_PKEYINDEX | - IB_QP_ATTR_ACCESSCONTROL), - .PortGUID = kibnal_data.kib_port_guid, - .PkeyIndex = 0, - .AccessControl = { - .s = { - .RdmaWrite = 1, - .RdmaRead = 1, - }, - }, - }; - frc = iba_modify_qp(conn->ibc_qp, ¶ms.qp_attr, NULL); - if (frc != 0) { - CERROR ("Can't set QP %s state to INIT: %d\n", - libcfs_nid2str(nid), frc); - goto failed; - } - - frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL); - if (frc != FSUCCESS) { - CERROR ("Can't query QP %s attributes: %d\n", - libcfs_nid2str(nid), frc); - goto failed; - } - - /* 1 ref for caller */ - atomic_set (&conn->ibc_refcount, 1); - CDEBUG(D_NET, "New conn %p\n", conn); - return (conn); - - failed: - kibnal_destroy_conn (conn); - return (NULL); -} - -void -kibnal_destroy_conn (kib_conn_t *conn) -{ - FSTATUS frc; - - LASSERT (!in_interrupt()); - - CDEBUG (D_NET, "connection %s\n", - (conn->ibc_peer) == NULL ? "<ANON>" : - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - LASSERT (atomic_read (&conn->ibc_refcount) == 0); - LASSERT (list_empty(&conn->ibc_early_rxs)); - LASSERT (list_empty(&conn->ibc_tx_queue)); - LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT (list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT (list_empty(&conn->ibc_active_txs)); - LASSERT (conn->ibc_nsends_posted == 0); - - switch (conn->ibc_state) { - case IBNAL_CONN_INIT_NOTHING: - case IBNAL_CONN_INIT_QP: - case IBNAL_CONN_DISCONNECTED: - break; - - default: - /* conn must either have never engaged with the CM, or have - * completely disengaged from it */ - CERROR("Bad conn %s state %d\n", - (conn->ibc_peer) == NULL ? "<anon>" : - libcfs_nid2str(conn->ibc_peer->ibp_nid), conn->ibc_state); - LBUG(); - } - - if (conn->ibc_cep != NULL) { - frc = iba_cm_destroy_cep(conn->ibc_cep); - if (frc != FSUCCESS) - CERROR("Error destroying CEP %p: %d\n", - conn->ibc_cep, frc); - } - - if (conn->ibc_qp != NULL) { - frc = iba_destroy_qp(conn->ibc_qp); - if (frc != FSUCCESS) - CERROR("Error destroying QP %p: %d\n", - conn->ibc_qp, frc); - } - - if (conn->ibc_rx_pages != NULL) - kibnal_free_pages(conn->ibc_rx_pages); - - if (conn->ibc_rxs != NULL) - LIBCFS_FREE(conn->ibc_rxs, - IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - if (conn->ibc_cvars != NULL) - LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars)); - - if (conn->ibc_peer != NULL) - kibnal_peer_decref(conn->ibc_peer); - - LIBCFS_FREE(conn, sizeof (*conn)); - - atomic_dec(&kibnal_data.kib_nconns); -} - -int -kibnal_close_peer_conns_locked (kib_peer_t *peer, int why) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - count++; - kibnal_close_conn_locked (conn, why); - } - - return (count); -} - -int -kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - if (conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn nid:%s incarnation:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_incarnation, incarnation); - - count++; - kibnal_close_conn_locked (conn, -ESTALE); - } - - return (count); -} - -int -kibnal_close_matching_conns (lnet_nid_t nid) -{ - unsigned long flags; - kib_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kibnal_close_peer_conns_locked (peer, 0); - } - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return (0); - - return (count == 0 ? -ENOENT : 0); -} - -int -kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - ENTRY; - - LASSERT (ni == kibnal_data.kib_ni); - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - int share_count = 0; - - rc = kibnal_get_peer_info(data->ioc_count, - &nid, &share_count); - data->ioc_nid = nid; - data->ioc_count = share_count; - break; - } - case IOC_LIBCFS_ADD_PEER: { - rc = kibnal_add_persistent_peer (data->ioc_nid); - break; - } - case IOC_LIBCFS_DEL_PEER: { - rc = kibnal_del_peer (data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count); - - if (conn == NULL) - rc = -ENOENT; - else { - rc = 0; - data->ioc_nid = conn->ibc_peer->ibp_nid; - kibnal_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kibnal_close_matching_conns (data->ioc_nid); - break; - } - case IOC_LIBCFS_REGISTER_MYNID: { - if (ni->ni_nid == data->ioc_nid) { - rc = 0; - } else { - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - rc = -EINVAL; - } - break; - } - } - - RETURN(rc); -} - -void -kibnal_free_pages (kib_pages_t *p) -{ - int npages = p->ibp_npages; - int i; - - for (i = 0; i < npages; i++) - if (p->ibp_pages[i] != NULL) - __free_page(p->ibp_pages[i]); - - LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); -} - -int -kibnal_alloc_pages (kib_pages_t **pp, int npages) -{ - kib_pages_t *p; - int i; - - LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages])); - if (p == NULL) { - CERROR ("Can't allocate buffer %d\n", npages); - return (-ENOMEM); - } - - memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages])); - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_page (GFP_KERNEL); - if (p->ibp_pages[i] == NULL) { - CERROR ("Can't allocate page %d of %d\n", i, npages); - kibnal_free_pages(p); - return (-ENOMEM); - } - } - - *pp = p; - return (0); -} - -int -kibnal_alloc_tx_descs (void) -{ - int i; - - LIBCFS_ALLOC (kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - if (kibnal_data.kib_tx_descs == NULL) - return -ENOMEM; - - memset(kibnal_data.kib_tx_descs, 0, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - kib_tx_t *tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); - if (tx->tx_pages == NULL) - return -ENOMEM; -#else - LIBCFS_ALLOC(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - if (tx->tx_wrq == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_gl)); - if (tx->tx_gl == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBNAL_MAX_RDMA_FRAGS])); - if (tx->tx_rd == NULL) - return -ENOMEM; -#endif - } - - return 0; -} - -void -kibnal_free_tx_descs (void) -{ - int i; - - if (kibnal_data.kib_tx_descs == NULL) - return; - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - kib_tx_t *tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - if (tx->tx_pages != NULL) - LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); -#else - if (tx->tx_wrq != NULL) - LIBCFS_FREE(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - - if (tx->tx_gl != NULL) - LIBCFS_FREE(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_gl)); - - if (tx->tx_rd != NULL) - LIBCFS_FREE(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBNAL_MAX_RDMA_FRAGS])); -#endif - } - - LIBCFS_FREE(kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); -} - -int -kibnal_setup_tx_descs (void) -{ - int ipage = 0; - int page_offset = 0; - struct page *page; - kib_tx_t *tx; - int i; - int rc; - - /* pre-mapped messages are not bigger than 1 page */ - CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0); - - rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, - IBNAL_TX_MSG_PAGES()); - if (rc != 0) - return (rc); - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - page = kibnal_data.kib_tx_pages->ibp_pages[ipage]; - tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - /* Allocate an FMR for this TX so it can map src/sink buffers - * for large transfers */ -#endif - tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - tx->tx_hca_msg = kibnal_data.kib_whole_mem.md_addr + - lnet_page2phys(page) + page_offset; - - CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n", - i, tx, tx->tx_msg, tx->tx_hca_msg); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_TX_MSG_PAGES()); - } - } - - return (0); -} - -int -kibnal_register_all_memory(void) -{ - /* CAVEAT EMPTOR: this assumes all physical memory is in 1 contiguous - * chunk starting at 0 */ - struct sysinfo si; - __u64 total; - __u64 total2; - __u64 roundup = (128<<20); /* round up in big chunks */ - IB_MR_PHYS_BUFFER phys; - IB_ACCESS_CONTROL access; - FSTATUS frc; - - memset(&access, 0, sizeof(access)); - access.s.MWBindable = 1; - access.s.LocalWrite = 1; - access.s.RdmaRead = 1; - access.s.RdmaWrite = 1; - - /* XXX we don't bother with first-gen cards */ - if (kibnal_data.kib_hca_attrs.VendorId == 0xd0b7 && - kibnal_data.kib_hca_attrs.DeviceId == 0x3101) { - CERROR("Can't register all memory on first generation HCAs\n"); - return -EINVAL; - } - - si_meminfo(&si); - - CDEBUG(D_NET, "si_meminfo: %lu/%u, num_physpages %lu/%lu\n", - si.totalram, si.mem_unit, num_physpages, PAGE_SIZE); - - total = ((__u64)si.totalram) * si.mem_unit; - total2 = num_physpages * PAGE_SIZE; - if (total < total2) - total = total2; - - if (total == 0) { - CERROR("Can't determine memory size\n"); - return -ENOMEM; - } - - roundup = (128<<20); - total = (total + (roundup - 1)) & ~(roundup - 1); - - phys.PhysAddr = 0; - phys.Length = total; - - frc = iba_register_contig_pmr(kibnal_data.kib_hca, 0, &phys, 1, 0, - kibnal_data.kib_pd, access, - &kibnal_data.kib_whole_mem.md_handle, - &kibnal_data.kib_whole_mem.md_addr, - &kibnal_data.kib_whole_mem.md_lkey, - &kibnal_data.kib_whole_mem.md_rkey); - - if (frc != FSUCCESS) { - CERROR("registering physical memory failed: %d\n", frc); - return -EIO; - } - - CDEBUG(D_WARNING, "registered phys mem from 0("LPX64") for "LPU64"("LPU64") -> "LPX64"\n", - phys.PhysAddr, total, phys.Length, kibnal_data.kib_whole_mem.md_addr); - - return 0; -} - -void -kibnal_shutdown (lnet_ni_t *ni) -{ - int i; - int rc; - - LASSERT (ni == kibnal_data.kib_ni); - LASSERT (ni->ni_data == &kibnal_data); - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - switch (kibnal_data.kib_init) { - default: - CERROR ("Unexpected state %d\n", kibnal_data.kib_init); - LBUG(); - - case IBNAL_INIT_ALL: - /* stop accepting connections, prevent new peers and start to - * tear down all existing ones... */ - kibnal_stop_listener(1); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read (&kibnal_data.kib_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to disconnect\n", - atomic_read (&kibnal_data.kib_npeers)); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - /* fall through */ - - case IBNAL_INIT_CQ: - rc = iba_destroy_cq(kibnal_data.kib_cq); - if (rc != 0) - CERROR ("Destroy CQ error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_TXD: - kibnal_free_pages (kibnal_data.kib_tx_pages); - /* fall through */ - - case IBNAL_INIT_MD: - rc = iba_deregister_mr(kibnal_data.kib_whole_mem.md_handle); - if (rc != FSUCCESS) - CERROR ("Deregister memory: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_PD: - rc = iba_free_pd(kibnal_data.kib_pd); - if (rc != 0) - CERROR ("Destroy PD error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_SD: - rc = iba_sd_deregister(kibnal_data.kib_sd); - if (rc != 0) - CERROR ("Deregister SD error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_PORTATTRS: - LIBCFS_FREE(kibnal_data.kib_hca_attrs.PortAttributesList, - kibnal_data.kib_hca_attrs.PortAttributesListSize); - /* fall through */ - - case IBNAL_INIT_HCA: - rc = iba_close_ca(kibnal_data.kib_hca); - if (rc != 0) - CERROR ("Close HCA error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_DATA: - LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0); - LASSERT (kibnal_data.kib_peers != NULL); - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - LASSERT (list_empty (&kibnal_data.kib_peers[i])); - } - LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0); - LASSERT (list_empty (&kibnal_data.kib_connd_zombies)); - LASSERT (list_empty (&kibnal_data.kib_connd_conns)); - LASSERT (list_empty (&kibnal_data.kib_connd_peers)); - - /* flag threads to terminate; wake and wait for them to die */ - kibnal_data.kib_shutdown = 1; - wake_up_all (&kibnal_data.kib_sched_waitq); - wake_up_all (&kibnal_data.kib_connd_waitq); - - i = 2; - while (atomic_read (&kibnal_data.kib_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read (&kibnal_data.kib_nthreads)); - set_current_state (TASK_INTERRUPTIBLE); - schedule_timeout (HZ); - } - /* fall through */ - - case IBNAL_INIT_NOTHING: - break; - } - - kibnal_free_tx_descs(); - - if (kibnal_data.kib_peers != NULL) - LIBCFS_FREE (kibnal_data.kib_peers, - sizeof (struct list_head) * - kibnal_data.kib_peer_hash_size); - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - kibnal_data.kib_init = IBNAL_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -int -kibnal_get_ipif_name(char *ifname, int ifname_size, int idx) -{ - char *basename = *kibnal_tunables.kib_ipif_basename; - int n = strlen(basename); - int baseidx; - int m; - - if (n == 0) { /* empty string */ - CERROR("Empty IP interface basename specified\n"); - return -EINVAL; - } - - for (m = n; m > 0; m--) /* find max numeric postfix */ - if (sscanf(basename + m - 1, "%d", &baseidx) != 1) - break; - - if (m == 0) /* just a number */ - m = n; - - if (m == n) /* no postfix */ - baseidx = 1; /* default to 1 */ - - if (m >= ifname_size) - m = ifname_size - 1; - - memcpy(ifname, basename, m); /* copy prefix name */ - - snprintf(ifname + m, ifname_size - m, "%d", baseidx + idx); - - if (strlen(ifname) == ifname_size - 1) { - CERROR("IP interface basename %s too long\n", basename); - return -EINVAL; - } - - return 0; -} - -int -kibnal_startup (lnet_ni_t *ni) -{ - char ipif_name[32]; - __u32 ip; - __u32 netmask; - int up; - int nob; - struct timeval tv; - IB_PORT_ATTRIBUTES *pattr; - FSTATUS frc; - int rc; - __u32 n; - int i; - - LASSERT (ni->ni_lnd == &the_kiblnd); - - /* Only 1 instance supported */ - if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) { - CERROR ("Can't set credits(%d) > ntx(%d)\n", - *kibnal_tunables.kib_credits, - *kibnal_tunables.kib_ntx); - return -EINVAL; - } - - ni->ni_maxtxcredits = *kibnal_tunables.kib_credits; - ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits; - - CLASSERT (LNET_MAX_INTERFACES > 1); - - if (ni->ni_interfaces[0] == NULL) { - kibnal_data.kib_hca_idx = 0; - } else { - /* Use the HCA specified in 'networks=' */ - if (ni->ni_interfaces[1] != NULL) { - CERROR("Multiple interfaces not supported\n"); - return -EPERM; - } - - /* Parse <number> into kib_hca_idx */ - nob = strlen(ni->ni_interfaces[0]); - if (sscanf(ni->ni_interfaces[0], "%d%n", - &kibnal_data.kib_hca_idx, &nob) < 1 || - nob != strlen(ni->ni_interfaces[0])) { - CERROR("Can't parse interface '%s'\n", - ni->ni_interfaces[0]); - return -EINVAL; - } - } - - rc = kibnal_get_ipif_name(ipif_name, sizeof(ipif_name), - kibnal_data.kib_hca_idx); - if (rc != 0) - return rc; - - rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask); - if (rc != 0) { - CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc); - return -ENETDOWN; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name); - return -ENETDOWN; - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip); - - ni->ni_data = &kibnal_data; - kibnal_data.kib_ni = ni; - - do_gettimeofday(&tv); - kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - PORTAL_MODULE_USE; - - rwlock_init(&kibnal_data.kib_global_lock); - - kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC (kibnal_data.kib_peers, - sizeof (struct list_head) * kibnal_data.kib_peer_hash_size); - if (kibnal_data.kib_peers == NULL) { - goto failed; - } - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kibnal_data.kib_peers[i]); - - spin_lock_init (&kibnal_data.kib_connd_lock); - INIT_LIST_HEAD (&kibnal_data.kib_connd_peers); - INIT_LIST_HEAD (&kibnal_data.kib_connd_conns); - INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies); - init_waitqueue_head (&kibnal_data.kib_connd_waitq); - - spin_lock_init (&kibnal_data.kib_sched_lock); - init_waitqueue_head (&kibnal_data.kib_sched_waitq); - - spin_lock_init (&kibnal_data.kib_tx_lock); - INIT_LIST_HEAD (&kibnal_data.kib_idle_txs); - - rc = kibnal_alloc_tx_descs(); - if (rc != 0) { - CERROR("Can't allocate tx descs\n"); - goto failed; - } - - /* lists/ptrs/locks initialised */ - kibnal_data.kib_init = IBNAL_INIT_DATA; - /*****************************************************/ - - kibnal_data.kib_sdretry.RetryCount = *kibnal_tunables.kib_sd_retries; - kibnal_data.kib_sdretry.Timeout = (*kibnal_tunables.kib_timeout * 1000)/ - *kibnal_tunables.kib_sd_retries; - - for (i = 0; i < IBNAL_N_SCHED; i++) { - rc = kibnal_thread_start (kibnal_scheduler, - (void *)(unsigned long)i); - if (rc != 0) { - CERROR("Can't spawn iib scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - rc = kibnal_thread_start (kibnal_connd, NULL); - if (rc != 0) { - CERROR ("Can't spawn iib connd: %d\n", rc); - goto failed; - } - - n = sizeof(kibnal_data.kib_hca_guids) / - sizeof(kibnal_data.kib_hca_guids[0]); - frc = iba_get_caguids(&n, kibnal_data.kib_hca_guids); - if (frc != FSUCCESS) { - CERROR ("Can't get HCA guids: %d\n", frc); - goto failed; - } - - if (n == 0) { - CERROR ("No HCAs found\n"); - goto failed; - } - - if (n <= kibnal_data.kib_hca_idx) { - CERROR("Invalid HCA %d requested: (must be 0 - %d inclusive)\n", - kibnal_data.kib_hca_idx, n - 1); - goto failed; - } - - /* Infinicon has per-HCA notification callbacks */ - frc = iba_open_ca(kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx], - kibnal_hca_callback, - kibnal_hca_async_callback, - NULL, - &kibnal_data.kib_hca); - if (frc != FSUCCESS) { - CERROR ("Can't open HCA[%d]: %d\n", - kibnal_data.kib_hca_idx, frc); - goto failed; - } - - /* Channel Adapter opened */ - kibnal_data.kib_init = IBNAL_INIT_HCA; - /*****************************************************/ - - kibnal_data.kib_hca_attrs.PortAttributesList = NULL; - kibnal_data.kib_hca_attrs.PortAttributesListSize = 0; - frc = iba_query_ca(kibnal_data.kib_hca, - &kibnal_data.kib_hca_attrs, NULL); - if (frc != FSUCCESS) { - CERROR ("Can't size port attrs: %d\n", frc); - goto failed; - } - - LIBCFS_ALLOC(kibnal_data.kib_hca_attrs.PortAttributesList, - kibnal_data.kib_hca_attrs.PortAttributesListSize); - if (kibnal_data.kib_hca_attrs.PortAttributesList == NULL) - goto failed; - - /* Port attrs allocated */ - kibnal_data.kib_init = IBNAL_INIT_PORTATTRS; - /*****************************************************/ - - frc = iba_query_ca(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs, - NULL); - if (frc != FSUCCESS) { - CERROR ("Can't get port attrs for HCA %d: %d\n", - kibnal_data.kib_hca_idx, frc); - goto failed; - } - - for (i = 0, pattr = kibnal_data.kib_hca_attrs.PortAttributesList; - pattr != NULL; - i++, pattr = pattr->Next) { - switch (pattr->PortState) { - default: - CERROR("Unexpected port[%d] state %d\n", - i, pattr->PortState); - continue; - case PortStateDown: - CDEBUG(D_NET, "port[%d] Down\n", i); - continue; - case PortStateInit: - CDEBUG(D_NET, "port[%d] Init\n", i); - continue; - case PortStateArmed: - CDEBUG(D_NET, "port[%d] Armed\n", i); - continue; - - case PortStateActive: - CDEBUG(D_NET, "port[%d] Active\n", i); - kibnal_data.kib_port = i; - kibnal_data.kib_port_guid = pattr->GUID; - kibnal_data.kib_port_pkey = pattr->PkeyTable[0]; - break; - } - break; - } - - if (pattr == NULL) { - CERROR ("Can't find an active port\n"); - goto failed; - } - - CDEBUG(D_NET, "got guid "LPX64"\n", kibnal_data.kib_port_guid); - - frc = iba_sd_register(&kibnal_data.kib_sd, NULL); - if (frc != FSUCCESS) { - CERROR ("Can't register with SD: %d\n", frc); - goto failed; - } - - /* Registered with SD OK */ - kibnal_data.kib_init = IBNAL_INIT_SD; - /*****************************************************/ - - frc = iba_alloc_pd(kibnal_data.kib_hca, 0, &kibnal_data.kib_pd); - if (frc != FSUCCESS) { - CERROR ("Can't create PD: %d\n", rc); - goto failed; - } - - /* flag PD initialised */ - kibnal_data.kib_init = IBNAL_INIT_PD; - /*****************************************************/ - - rc = kibnal_register_all_memory(); - if (rc != 0) { - CERROR ("Can't register all memory\n"); - goto failed; - } - - /* flag whole memory MD initialised */ - kibnal_data.kib_init = IBNAL_INIT_MD; - /*****************************************************/ - - rc = kibnal_setup_tx_descs(); - if (rc != 0) { - CERROR ("Can't register tx descs: %d\n", rc); - goto failed; - } - - /* flag TX descs initialised */ - kibnal_data.kib_init = IBNAL_INIT_TXD; - /*****************************************************/ - - frc = iba_create_cq(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(), - &kibnal_data.kib_cq, &kibnal_data.kib_cq, - &n); - if (frc != FSUCCESS) { - CERROR ("Can't create RX CQ: %d\n", frc); - goto failed; - } - - /* flag CQ initialised */ - kibnal_data.kib_init = IBNAL_INIT_CQ; - /*****************************************************/ - - if (n < IBNAL_CQ_ENTRIES()) { - CERROR ("CQ only has %d entries: %d needed\n", - n, IBNAL_CQ_ENTRIES()); - goto failed; - } - - rc = iba_rearm_cq(kibnal_data.kib_cq, CQEventSelNextWC); - if (rc != 0) { - CERROR ("Failed to re-arm completion queue: %d\n", rc); - goto failed; - } - - rc = kibnal_start_listener(); - if (rc != 0) { - CERROR("Can't start listener: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kibnal_data.kib_init = IBNAL_INIT_ALL; - /*****************************************************/ - - return (0); - - failed: - kibnal_shutdown (ni); - return (-ENETDOWN); -} - -void __exit -kibnal_module_fini (void) -{ - lnet_unregister_lnd(&the_kiblnd); - kibnal_tunables_fini(); -} - -int __init -kibnal_module_init (void) -{ - int rc; - - if (the_lnet.ln_ptlcompat != 0) { - LCONSOLE_ERROR_MSG(0x12c, "IIB does not support portals " - "compatibility mode\n"); - return -ENODEV; - } - - rc = kibnal_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kiblnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel Infinicon IB LND v1.00"); -MODULE_LICENSE("GPL"); - -module_init(kibnal_module_init); -module_exit(kibnal_module_fini); - diff --git a/lnet/klnds/iiblnd/iiblnd.h b/lnet/klnds/iiblnd/iiblnd.h deleted file mode 100644 index 008d59d4afe16508726d1160a6bd0885acacb452..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/iiblnd.h +++ /dev/null @@ -1,739 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -#include <linux/iba/ibt.h> - -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) - -/* Test for GCC > 3.2.2 */ -#if GCC_VERSION <= 30202 -/* GCC 3.2.2, and presumably several versions before it, will - * miscompile this driver. See - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9853. */ -#error Invalid GCC version. Must use GCC >= 3.2.3 -#endif - -#ifdef CONFIG_SMP -# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define IBNAL_N_SCHED 1 /* # schedulers */ -#endif - -#define IBNAL_USE_FMR 0 /* map on demand v. use whole mem mapping */ -#define KIBLND_DETAILED_DEBUG 0 - -/* tunables fixed at compile time */ -#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */ -#define IBNAL_CREDIT_HIGHWATER 7 /* when to eagerly return credits */ -#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ -#define IBNAL_RDMA_BASE 0x0eeb0000 -#define IBNAL_STARTING_PSN 1 - -/* QP tunables */ -/* 7 indicates infinite retry attempts, Infinicon recommended 5 */ -#define IBNAL_RETRY 5 /* # times to retry */ -#define IBNAL_RNR_RETRY 5 /* */ -#define IBNAL_CM_RETRY 5 /* # times to retry connection */ -#define IBNAL_FLOW_CONTROL 1 -#define IBNAL_ACK_TIMEOUT 20 /* supposedly 4 secs */ -#define IBNAL_EE_FLOW 1 -#define IBNAL_LOCAL_SUB 1 -#define IBNAL_FAILOVER_ACCEPTED 0 - -/************************/ -/* derived constants... */ - -/* TX messages (shared by all connections) */ -#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx) -#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE) -#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE) - -#if IBNAL_USE_FMR -# define IBNAL_MAX_RDMA_FRAGS 1 -# define IBNAL_CONCURRENT_SENDS IBNAL_RX_MSGS -#else -# define IBNAL_MAX_RDMA_FRAGS LNET_MAX_IOV -# define IBNAL_CONCURRENT_SENDS IBNAL_MSG_QUEUE_SIZE -#endif - -/* RX messages (per connection) */ -#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE * 2) -#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE) -#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) - -#define IBNAL_CQ_ENTRIES() (IBNAL_TX_MSGS() * (1 + IBNAL_MAX_RDMA_FRAGS) + \ - (IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers)) - -typedef struct -{ - char **kib_hca_basename; /* HCA base name */ - char **kib_ipif_basename; /* IPoIB interface base name */ - char **kib_service_name; /* global service name */ - unsigned int *kib_service_number; /* global service number */ - int *kib_min_reconnect_interval; /* min connect retry seconds... */ - int *kib_max_reconnect_interval; /* max connect retry seconds */ - int *kib_concurrent_peers; /* max # peers */ - int *kib_cksum; /* checksum kib_msg_t? */ - int *kib_timeout; /* comms timeout (seconds) */ - int *kib_keepalive; /* keepalive timeout (seconds) */ - int *kib_ntx; /* # tx descs */ - int *kib_credits; /* # concurrent sends */ - int *kib_peercredits; /* # concurrent sends to 1 peer */ - int *kib_sd_retries; /* # concurrent sends to 1 peer */ - int *kib_concurrent_sends; /* send work queue sizing */ -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */ -#endif -} kib_tunables_t; - -/* NB The Infinicon stack has specific typedefs for some things - * (e.g. IB_{L,R}_KEY), that just map back to __u32 etc */ -typedef struct -{ - int ibp_npages; /* # pages */ - struct page *ibp_pages[0]; -} kib_pages_t; - -typedef struct -{ - IB_HANDLE md_handle; - __u32 md_lkey; - __u32 md_rkey; - __u64 md_addr; -} kib_md_t; - -typedef struct -{ - int kib_init; /* initialisation state */ - __u64 kib_incarnation; /* which one am I */ - int kib_shutdown; /* shut down? */ - atomic_t kib_nthreads; /* # live threads */ - lnet_ni_t *kib_ni; /* _the_ iib instance */ - - __u64 kib_port_guid; /* my GUID (lo 64 of GID)*/ - __u16 kib_port_pkey; /* my pkey, whatever that is */ - struct semaphore kib_listener_signal; /* signal completion */ - IB_HANDLE kib_listener_cep; /* connection end point */ - - rwlock_t kib_global_lock; /* stabilize peer/conn ops */ - int kib_ready; /* CQ callback fired */ - int kib_checking_cq; /* a scheduler is checking the CQ */ - - struct list_head *kib_peers; /* hash table of all my known peers */ - int kib_peer_hash_size; /* size of kib_peers */ - atomic_t kib_npeers; /* # peers extant */ - atomic_t kib_nconns; /* # connections extant */ - - struct list_head kib_connd_zombies; /* connections to free */ - struct list_head kib_connd_conns; /* connections to progress */ - struct list_head kib_connd_peers; /* peers waiting for a connection */ - wait_queue_head_t kib_connd_waitq; /* connection daemon sleep here */ - spinlock_t kib_connd_lock; /* serialise */ - - wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ - spinlock_t kib_sched_lock; /* serialise */ - - struct kib_tx *kib_tx_descs; /* all the tx descriptors */ - kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ - - struct list_head kib_idle_txs; /* idle tx descriptors */ - __u64 kib_next_tx_cookie; /* RDMA completion cookie */ - spinlock_t kib_tx_lock; /* serialise */ - - IB_HANDLE kib_hca; /* The HCA */ - int kib_port; /* port on the device */ - IB_HANDLE kib_pd; /* protection domain */ - IB_HANDLE kib_sd; /* SD handle */ - IB_HANDLE kib_cq; /* completion queue */ - kib_md_t kib_whole_mem; /* whole-mem registration */ - - int kib_hca_idx; /* my HCA number */ - uint64 kib_hca_guids[8]; /* all the HCA guids */ - IB_CA_ATTRIBUTES kib_hca_attrs; /* where to get HCA attrs */ - - COMMAND_CONTROL_PARAMETERS kib_sdretry; /* control SD query retries */ -} kib_data_t; - -#define IBNAL_INIT_NOTHING 0 -#define IBNAL_INIT_DATA 1 -#define IBNAL_INIT_LIB 2 -#define IBNAL_INIT_HCA 3 -#define IBNAL_INIT_PORTATTRS 4 -#define IBNAL_INIT_SD 5 -#define IBNAL_INIT_PD 6 -#define IBNAL_INIT_MD 7 -#define IBNAL_INIT_TXD 8 -#define IBNAL_INIT_CQ 9 -#define IBNAL_INIT_ALL 10 - -/************************************************************************ - * Wire message structs. - * These are sent in sender's byte order (i.e. receiver flips). - * CAVEAT EMPTOR: other structs communicated between nodes (e.g. MAD - * private data and SM service info), is LE on the wire. - */ - -typedef struct kib_connparams -{ - __u32 ibcp_queue_depth; - __u32 ibcp_max_msg_size; - __u32 ibcp_max_frags; -} WIRE_ATTR kib_connparams_t; - -typedef struct -{ - lnet_hdr_t ibim_hdr; /* portals header */ - char ibim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kib_immediate_msg_t; - -#if IBNAL_USE_FMR -typedef struct -{ - __u64 rd_addr; /* IO VMA address */ - __u32 rd_nob; /* # of bytes */ - __u32 rd_key; /* remote key */ -} WIRE_ATTR kib_rdma_desc_t; -#else -typedef struct -{ - __u32 rf_nob; /* # of bytes */ - __u64 rf_addr; /* remote io vaddr */ -} WIRE_ATTR kib_rdma_frag_t; - -typedef struct -{ - __u32 rd_key; /* local/remote key */ - __u32 rd_nfrag; /* # fragments */ - kib_rdma_frag_t rd_frags[0]; /* buffer frags */ -} WIRE_ATTR kib_rdma_desc_t; -#endif - -typedef struct -{ - lnet_hdr_t ibprm_hdr; /* LNET header */ - __u64 ibprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kib_putreq_msg_t; - -typedef struct -{ - __u64 ibpam_src_cookie; /* reflected completion cookie */ - __u64 ibpam_dst_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */ -} WIRE_ATTR kib_putack_msg_t; - -typedef struct -{ - lnet_hdr_t ibgm_hdr; /* LNET header */ - __u64 ibgm_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibgm_rd; /* sender's sink buffer */ -} WIRE_ATTR kib_get_msg_t; - -typedef struct -{ - __u64 ibcm_cookie; /* opaque completion cookie */ - __u32 ibcm_status; /* completion status */ -} WIRE_ATTR kib_completion_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ibm_magic; /* I'm an openibnal message */ - __u16 ibm_version; /* this is my version number */ - - __u8 ibm_type; /* msg type */ - __u8 ibm_credits; /* returned credits */ - __u32 ibm_nob; /* # bytes in whole message */ - __u32 ibm_cksum; /* checksum (0 == no checksum) */ - __u64 ibm_srcnid; /* sender's NID */ - __u64 ibm_srcstamp; /* sender's incarnation */ - __u64 ibm_dstnid; /* destination's NID */ - __u64 ibm_dststamp; /* destination's incarnation */ - __u64 ibm_seq; /* sequence number */ - - union { - kib_connparams_t connparams; - kib_immediate_msg_t immediate; - kib_putreq_msg_t putreq; - kib_putack_msg_t putack; - kib_get_msg_t get; - kib_completion_msg_t completion; - } WIRE_ATTR ibm_u; -} WIRE_ATTR kib_msg_t; - -#define IBNAL_MSG_MAGIC LNET_PROTO_IIB_MAGIC /* unique magic */ -#define IBNAL_MSG_VERSION 2 /* current protocol version */ -#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 1 /* previous version */ - -#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */ -#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */ -#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define IBNAL_MSG_IMMEDIATE 0xd1 /* immediate */ -#define IBNAL_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */ -#define IBNAL_MSG_PUT_NAK 0xd3 /* completion (sink->src) */ -#define IBNAL_MSG_PUT_ACK 0xd4 /* putack (sink->src) */ -#define IBNAL_MSG_PUT_DONE 0xd5 /* completion (src->sink) */ -#define IBNAL_MSG_GET_REQ 0xd6 /* getreq (sink->src) */ -#define IBNAL_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */ - -/* connection rejection reasons */ -#define IBNAL_REJECT_CONN_RACE 0 /* You lost connection race */ -#define IBNAL_REJECT_NO_RESOURCES 1 /* Out of memory/conns etc */ -#define IBNAL_REJECT_FATAL 2 /* Anything else */ - -/***********************************************************************/ - -typedef struct kib_rx /* receive message */ -{ - struct list_head rx_list; /* queue for attention */ - struct kib_conn *rx_conn; /* owning conn */ - int rx_nob; /* # bytes received (-1 while posted) */ - __u64 rx_hca_msg; /* pre-mapped buffer (hca vaddr) */ - kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */ - IB_WORK_REQ2 rx_wrq; - IB_LOCAL_DATASEGMENT rx_gl; /* and its memory */ -} kib_rx_t; - -typedef struct kib_tx /* transmit message */ -{ - struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ - struct kib_conn *tx_conn; /* owning conn */ - int tx_mapped; /* mapped for RDMA? */ - int tx_sending; /* # tx callbacks outstanding */ - int tx_queued; /* queued for sending */ - int tx_waiting; /* waiting for peer */ - int tx_status; /* completion status */ - unsigned long tx_deadline; /* completion deadline */ - __u64 tx_cookie; /* completion cookie */ - lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */ - kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */ - __u64 tx_hca_msg; /* pre-mapped buffer (HCA vaddr) */ - int tx_nwrq; /* # send work items */ -#if IBNAL_USE_FMR - IB_WORK_REQ2 tx_wrq[2]; /* send work items... */ - IB_LOCAL_DATASEGMENT tx_gl[2]; /* ...and their memory */ - kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */ - kib_md_t tx_md; /* mapping */ - __u64 *tx_pages; /* page phys addrs */ -#else - IB_WORK_REQ2 *tx_wrq; /* send work items... */ - IB_LOCAL_DATASEGMENT *tx_gl; /* ...and their memory */ - kib_rdma_desc_t *tx_rd; /* rdma descriptor (src buffers) */ -#endif -} kib_tx_t; - -typedef struct -{ - /* scratchpad during connection establishment */ - IB_QP_ATTRIBUTES_QUERY cv_qpattrs; - QUERY cv_query; - IB_SERVICE_RECORD cv_svcrec; - IB_PATH_RECORD cv_path; - CM_CONN_INFO cv_cmci; -} kib_connvars_t; - -typedef struct kib_conn -{ - struct kib_peer *ibc_peer; /* owning peer */ - struct list_head ibc_list; /* stash on peer's conn list */ - __u64 ibc_incarnation; /* which instance of the peer */ - __u64 ibc_txseq; /* tx sequence number */ - __u64 ibc_rxseq; /* rx sequence number */ - __u32 ibc_version; /* peer protocol version */ - atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits; /* # credits for ACK/DONE msgs */ - unsigned long ibc_last_send; /* time of last send */ - struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */ - struct list_head ibc_tx_queue_nocred; /* sends that don't need a cred */ - struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */ - struct list_head ibc_tx_queue; /* send queue */ - struct list_head ibc_active_txs; /* active tx awaiting completion */ - spinlock_t ibc_lock; /* serialise */ - kib_rx_t *ibc_rxs; /* the rx descs */ - kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ - IB_HANDLE ibc_qp; /* queue pair */ - IB_HANDLE ibc_cep; /* CM endpoint */ - kib_connvars_t *ibc_cvars; /* connection scratchpad */ -} kib_conn_t; - -#define IBNAL_CONN_INIT_NOTHING 0 /* initial state */ -#define IBNAL_CONN_INIT_QP 1 /* ibc_qp set up */ -#define IBNAL_CONN_CONNECTING 2 /* started to connect */ -#define IBNAL_CONN_ESTABLISHED 3 /* connection established */ -#define IBNAL_CONN_DISCONNECTING 4 /* to send disconnect req */ -#define IBNAL_CONN_DISCONNECTED 5 /* no more QP or CM traffic */ - -/* types of connection */ -#define IBNAL_CONN_ACTIVE 0 /* active connect */ -#define IBNAL_CONN_PASSIVE 1 /* passive connect */ -#define IBNAL_CONN_WAITING 2 /* waiting for connect */ - -typedef struct kib_peer -{ - struct list_head ibp_list; /* stash on global peer list */ - struct list_head ibp_connd_list; /* schedule on kib_connd_peers */ - lnet_nid_t ibp_nid; /* who's on the other end(s) */ - atomic_t ibp_refcount; /* # users */ - int ibp_persistence; /* "known" peer refs */ - int ibp_version; /* protocol version */ - struct list_head ibp_conns; /* all active connections */ - struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - int ibp_connecting; /* active connects in progress */ - int ibp_accepting; /* passive connects in progress */ - int ibp_passivewait; /* waiting for peer to connect */ - unsigned long ibp_passivewait_deadline; /* when passive wait must complete */ - unsigned long ibp_reconnect_time; /* when reconnect may be attempted */ - unsigned long ibp_reconnect_interval; /* exponential backoff */ - int ibp_error; /* errno on closing this peer */ - cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */ -} kib_peer_t; - - -extern kib_data_t kibnal_data; -extern kib_tunables_t kibnal_tunables; - -/******************************************************************************/ - -/* these are purposely avoiding using local vars so they don't increase - * stack consumption. */ - -#define kibnal_conn_addref(conn) \ -do { \ - CDEBUG(D_NET, "conn[%p] (%d)++\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - atomic_inc(&(conn)->ibc_refcount); \ -} while (0) - -#define kibnal_conn_decref(conn) \ -do { \ - unsigned long flags; \ - \ - CDEBUG(D_NET, "conn[%p] (%d)--\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); \ - list_add_tail(&(conn)->ibc_list, \ - &kibnal_data.kib_connd_zombies); \ - wake_up(&kibnal_data.kib_connd_waitq); \ - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); \ - } \ -} while (0) - -#define kibnal_peer_addref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - atomic_inc(&(peer)->ibp_refcount); \ -} while (0) - -#define kibnal_peer_decref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ - kibnal_destroy_peer(peer); \ -} while (0) - -/******************************************************************************/ - -static inline struct list_head * -kibnal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - - return (&kibnal_data.kib_peers [hash]); -} - -static inline int -kibnal_peer_active(kib_peer_t *peer) -{ - /* Am I in the peer hash table? */ - return (!list_empty(&peer->ibp_list)); -} - -static inline int -kibnal_peer_connecting(kib_peer_t *peer) -{ - /* Am I expecting a connection to materialise? */ - return (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - peer->ibp_passivewait); -} - -static inline void -kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) -{ - struct list_head *q; - - LASSERT (tx->tx_nwrq > 0); /* work items set up */ - LASSERT (!tx->tx_queued); /* not queued for sending already */ - - tx->tx_queued = 1; - tx->tx_deadline = jiffies + (*kibnal_tunables.kib_timeout * HZ); - - if (tx->tx_conn == NULL) { - kibnal_conn_addref(conn); - tx->tx_conn = conn; - LASSERT (tx->tx_msg->ibm_type != IBNAL_MSG_PUT_DONE); - } else { - LASSERT (tx->tx_conn == conn); - LASSERT (tx->tx_msg->ibm_type == IBNAL_MSG_PUT_DONE); - } - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* All messages have simple credit control */ - q = &conn->ibc_tx_queue; - } else { - LASSERT (conn->ibc_version == IBNAL_MSG_VERSION); - - switch (tx->tx_msg->ibm_type) { - case IBNAL_MSG_PUT_REQ: - case IBNAL_MSG_GET_REQ: - /* RDMA request: reserve a buffer for the RDMA reply - * before sending */ - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBNAL_MSG_PUT_NAK: - case IBNAL_MSG_PUT_ACK: - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - /* RDMA reply/completion: no credits; peer has reserved - * a reply buffer */ - q = &conn->ibc_tx_queue_nocred; - break; - - case IBNAL_MSG_NOOP: - case IBNAL_MSG_IMMEDIATE: - /* Otherwise: consume a credit before sending */ - q = &conn->ibc_tx_queue; - break; - - default: - LBUG(); - q = NULL; - } - } - - list_add_tail(&tx->tx_list, q); -} - -static inline int -kibnal_send_keepalive(kib_conn_t *conn) -{ - return (*kibnal_tunables.kib_keepalive > 0) && - time_after(jiffies, conn->ibc_last_send + - *kibnal_tunables.kib_keepalive*HZ); -} - -#define KIBNAL_SERVICE_KEY_MASK (IB_SERVICE_RECORD_COMP_SERVICENAME | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_1 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_2 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_3 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_4 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_5 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_6 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_7 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_8) - -static inline __u64* -kibnal_service_nid_field(IB_SERVICE_RECORD *srv) -{ - /* must be consistent with KIBNAL_SERVICE_KEY_MASK */ - return (__u64 *)srv->ServiceData8; -} - -static inline void -kibnal_set_service_keys(IB_SERVICE_RECORD *srv, lnet_nid_t nid) -{ - char *svc_name = *kibnal_tunables.kib_service_name; - - LASSERT (strlen(svc_name) < sizeof(srv->ServiceName)); - memset (srv->ServiceName, 0, sizeof(srv->ServiceName)); - strcpy (srv->ServiceName, svc_name); - - *kibnal_service_nid_field(srv) = cpu_to_le64(nid); -} - -/* CAVEAT EMPTOR: We rely on tx/rx descriptor alignment to allow us to use the - * lowest 2 bits of the work request id to stash the work item type (the op - * field is not valid when the wc completes in error). */ - -#define IBNAL_WID_TX 0 -#define IBNAL_WID_RX 1 -#define IBNAL_WID_RDMA 2 -#define IBNAL_WID_MASK 3UL - -static inline __u64 -kibnal_ptr2wreqid (void *ptr, int type) -{ - unsigned long lptr = (unsigned long)ptr; - - LASSERT ((lptr & IBNAL_WID_MASK) == 0); - LASSERT ((type & ~IBNAL_WID_MASK) == 0); - return (__u64)(lptr | type); -} - -static inline void * -kibnal_wreqid2ptr (__u64 wreqid) -{ - return (void *)(((unsigned long)wreqid) & ~IBNAL_WID_MASK); -} - -static inline int -kibnal_wreqid2type (__u64 wreqid) -{ - return (wreqid & IBNAL_WID_MASK); -} - -static inline void -kibnal_set_conn_state (kib_conn_t *conn, int state) -{ - CDEBUG(D_NET,"%p state %d\n", conn, state); - conn->ibc_state = state; - mb(); -} - -#if IBNAL_USE_FMR - -static inline int -kibnal_rd_size (kib_rdma_desc_t *rd) -{ - return rd->rd_nob; -} - -#else -static inline int -kibnal_rd_size (kib_rdma_desc_t *rd) -{ - int i; - int size; - - for (i = size = 0; i < rd->rd_nfrag; i++) - size += rd->rd_frags[i].rf_nob; - - return size; -} -#endif - -int kibnal_startup (lnet_ni_t *ni); -void kibnal_shutdown (lnet_ni_t *ni); -int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kibnal_eager_recv (lnet_ni_t *ni, void *private, - lnet_msg_t *lntmsg, void **new_private); -int kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob); -void kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, - lnet_nid_t dstnid, __u64 dststamp, __u64 seq); -void kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob, int type, - lnet_nid_t dstnid, __u64 dststamp); -int kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob); -IB_HANDLE kibnal_create_cep(lnet_nid_t nid); -int kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid); -void kibnal_destroy_peer (kib_peer_t *peer); -kib_peer_t *kibnal_find_peer_locked (lnet_nid_t nid); -int kibnal_del_peer (lnet_nid_t nid); -void kibnal_peer_alive (kib_peer_t *peer); -void kibnal_unlink_peer_locked (kib_peer_t *peer); -int kibnal_add_persistent_peer (lnet_nid_t nid); -int kibnal_close_stale_conns_locked (kib_peer_t *peer, - __u64 incarnation); -int kibnal_conn_rts(kib_conn_t *conn, - __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn); -kib_conn_t *kibnal_create_conn (lnet_nid_t nid, int proto_version); -void kibnal_destroy_conn (kib_conn_t *conn); -void kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg); -int kibnal_alloc_pages (kib_pages_t **pp, int npages); -void kibnal_free_pages (kib_pages_t *p); -void kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn); -void kibnal_txlist_done (struct list_head *txlist, int status); -int kibnal_post_receives (kib_conn_t *conn); -int kibnal_init_rdma (kib_tx_t *tx, int type, int nob, - kib_rdma_desc_t *dstrd, __u64 dstcookie); -void kibnal_check_sends (kib_conn_t *conn); -void kibnal_close_conn_locked (kib_conn_t *conn, int error); -int kibnal_thread_start (int (*fn)(void *arg), void *arg); -int kibnal_scheduler(void *arg); -int kibnal_connd (void *arg); -void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); -void kibnal_close_conn (kib_conn_t *conn, int why); -void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lnet_msg_t *lntmsg, - unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int nob); -void kibnal_hca_async_callback (void *hca_arg, IB_EVENT_RECORD *ev); -void kibnal_hca_callback (void *hca_arg, void *cq_arg); -int kibnal_tunables_init (void); -void kibnal_tunables_fini (void); diff --git a/lnet/klnds/iiblnd/iiblnd_cb.c b/lnet/klnds/iiblnd/iiblnd_cb.c deleted file mode 100644 index 22d2bfd307ffac4bba582c14108cdc09655c979e..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/iiblnd_cb.c +++ /dev/null @@ -1,3399 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "iiblnd.h" - -void -hexdump(char *string, void *ptr, int len) -{ - unsigned char *c = ptr; - int i; - - return; - - if (len < 0 || len > 2048) { - printk("XXX what the hell? %d\n",len); - return; - } - - printk("%d bytes of '%s' from 0x%p\n", len, string, ptr); - - for (i = 0; i < len;) { - printk("%02x",*(c++)); - i++; - if (!(i & 15)) { - printk("\n"); - } else if (!(i&1)) { - printk(" "); - } - } - - if(len & 15) { - printk("\n"); - } -} - -void -kibnal_tx_done (kib_tx_t *tx) -{ - lnet_msg_t *lntmsg[2]; - int rc = tx->tx_status; - int i; - - LASSERT (!in_interrupt()); - LASSERT (!tx->tx_queued); /* mustn't be queued for sending */ - LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */ - LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */ - -#if IBNAL_USE_FMR - /* Handle unmapping if required */ -#endif - /* tx may have up to 2 lnet msgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - - if (tx->tx_conn != NULL) { - kibnal_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nwrq = 0; - tx->tx_status = 0; - - spin_lock(&kibnal_data.kib_tx_lock); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - spin_unlock(&kibnal_data.kib_tx_lock); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (lntmsg[i] == NULL) - continue; - - lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc); - } -} - -kib_tx_t * -kibnal_get_idle_tx (void) -{ - kib_tx_t *tx; - - spin_lock(&kibnal_data.kib_tx_lock); - - if (list_empty (&kibnal_data.kib_idle_txs)) { - spin_unlock(&kibnal_data.kib_tx_lock); - return NULL; - } - - tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - /* Allocate a new completion cookie. It might not be needed, - * but we've got a lock right now and we're unlikely to - * wrap... */ - tx->tx_cookie = kibnal_data.kib_next_tx_cookie++; - - spin_unlock(&kibnal_data.kib_tx_lock); - - LASSERT (tx->tx_nwrq == 0); - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending == 0); - LASSERT (!tx->tx_waiting); - LASSERT (tx->tx_status == 0); - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); - - return tx; -} - -int -kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit) -{ - kib_conn_t *conn = rx->rx_conn; - int rc = 0; - FSTATUS frc; - - LASSERT (!in_interrupt()); - /* old peers don't reserve rxs for RDMA replies */ - LASSERT (!rsrvd_credit || - conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - - rx->rx_gl = (IB_LOCAL_DATASEGMENT) { - .Address = rx->rx_hca_msg, - .Lkey = kibnal_data.kib_whole_mem.md_lkey, - .Length = IBNAL_MSG_SIZE, - }; - - rx->rx_wrq = (IB_WORK_REQ2) { - .Next = NULL, - .WorkReqId = kibnal_ptr2wreqid(rx, IBNAL_WID_RX), - .MessageLen = IBNAL_MSG_SIZE, - .DSList = &rx->rx_gl, - .DSListDepth = 1, - .Operation = WROpRecv, - }; - - LASSERT (conn->ibc_state >= IBNAL_CONN_CONNECTING); - LASSERT (rx->rx_nob >= 0); /* not posted */ - - CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n", - rx->rx_wrq.DSList->Length, - rx->rx_wrq.DSList->Lkey, - rx->rx_wrq.DSList->Address); - - if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) { - /* No more posts for this rx; so lose its ref */ - kibnal_conn_decref(conn); - return 0; - } - - rx->rx_nob = -1; /* flag posted */ - mb(); - - frc = iba_post_recv2(conn->ibc_qp, &rx->rx_wrq, NULL); - if (frc == FSUCCESS) { - if (credit || rsrvd_credit) { - spin_lock(&conn->ibc_lock); - - if (credit) - conn->ibc_outstanding_credits++; - if (rsrvd_credit) - conn->ibc_reserved_credits++; - - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); - } - return 0; - } - - CERROR ("post rx -> %s failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - rc = -EIO; - kibnal_close_conn(rx->rx_conn, rc); - /* No more posts for this rx; so lose its ref */ - kibnal_conn_decref(conn); - return rc; -} - -int -kibnal_post_receives (kib_conn_t *conn) -{ - int i; - int rc; - - LASSERT (conn->ibc_state == IBNAL_CONN_CONNECTING); - - for (i = 0; i < IBNAL_RX_MSGS; i++) { - /* +1 ref for rx desc. This ref remains until kibnal_post_rx - * fails (i.e. actual failure or we're disconnecting) */ - kibnal_conn_addref(conn); - rc = kibnal_post_rx (&conn->ibc_rxs[i], 0, 0); - if (rc != 0) - return rc; - } - - return 0; -} - -kib_tx_t * -kibnal_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie) -{ - struct list_head *tmp; - - list_for_each(tmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending != 0 || tx->tx_waiting); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_waiting && - tx->tx_msg->ibm_type == txtype) - return tx; - - CWARN("Bad completion: %swaiting, type %x (wanted %x)\n", - tx->tx_waiting ? "" : "NOT ", - tx->tx_msg->ibm_type, txtype); - } - return NULL; -} - -void -kibnal_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) -{ - kib_tx_t *tx; - int idle; - - spin_lock(&conn->ibc_lock); - - tx = kibnal_find_waiting_tx_locked(conn, txtype, cookie); - if (tx == NULL) { - spin_unlock(&conn->ibc_lock); - - CWARN("Unmatched completion type %x cookie "LPX64" from %s\n", - txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_close_conn (conn, -EPROTO); - return; - } - - if (tx->tx_status == 0) { /* success so far */ - if (status < 0) { /* failed? */ - tx->tx_status = status; - } else if (txtype == IBNAL_MSG_GET_REQ) { - lnet_set_reply_msg_len(kibnal_data.kib_ni, - tx->tx_lntmsg[1], status); - } - } - - tx->tx_waiting = 0; - - idle = !tx->tx_queued && (tx->tx_sending == 0); - if (idle) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (idle) - kibnal_tx_done(tx); -} - -void -kibnal_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie) -{ - kib_tx_t *tx = kibnal_get_idle_tx(); - - if (tx == NULL) { - CERROR("Can't get tx for completion %x for %s\n", - type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - tx->tx_msg->ibm_u.completion.ibcm_status = status; - tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie; - kibnal_init_tx_msg(tx, type, sizeof(kib_completion_msg_t)); - - kibnal_queue_tx(tx, conn); -} - -void -kibnal_handle_rx (kib_rx_t *rx) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - int credits = msg->ibm_credits; - kib_tx_t *tx; - int rc = 0; - int repost = 1; - int rsrvd_credit = 0; - int rc2; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - CDEBUG (D_NET, "Received %x[%d] from %s\n", - msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - if (credits != 0) { - /* Have I received credits that will let me send? */ - spin_lock(&conn->ibc_lock); - conn->ibc_credits += credits; - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); - } - - switch (msg->ibm_type) { - default: - CERROR("Bad IBNAL message type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_IMMEDIATE: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_PUT_REQ: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.putreq.ibprm_hdr, - msg->ibm_srcnid, rx, 1); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_PUT_NAK: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_handle_completion(conn, IBNAL_MSG_PUT_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBNAL_MSG_PUT_ACK: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - spin_lock(&conn->ibc_lock); - tx = kibnal_find_waiting_tx_locked(conn, IBNAL_MSG_PUT_REQ, - msg->ibm_u.putack.ibpam_src_cookie); - if (tx != NULL) - list_del(&tx->tx_list); - spin_unlock(&conn->ibc_lock); - - if (tx == NULL) { - CERROR("Unmatched PUT_ACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - } - - LASSERT (tx->tx_waiting); - /* CAVEAT EMPTOR: I could be racing with tx_complete, but... - * (a) I can overwrite tx_msg since my peer has received it! - * (b) tx_waiting set tells tx_complete() it's not done. */ - - tx->tx_nwrq = 0; /* overwrite PUT_REQ */ - - rc2 = kibnal_init_rdma(tx, IBNAL_MSG_PUT_DONE, - kibnal_rd_size(&msg->ibm_u.putack.ibpam_rd), - &msg->ibm_u.putack.ibpam_rd, - msg->ibm_u.putack.ibpam_dst_cookie); - if (rc2 < 0) - CERROR("Can't setup rdma for PUT to %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2); - - spin_lock(&conn->ibc_lock); - if (tx->tx_status == 0 && rc2 < 0) - tx->tx_status = rc2; - tx->tx_waiting = 0; /* clear waiting and queue atomically */ - kibnal_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - break; - - case IBNAL_MSG_PUT_DONE: - /* This buffer was pre-reserved by not returning the credit - * when the PUT_REQ's buffer was reposted, so I just return it - * now */ - kibnal_handle_completion(conn, IBNAL_MSG_PUT_ACK, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBNAL_MSG_GET_REQ: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.get.ibgm_hdr, - msg->ibm_srcnid, rx, 1); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_GET_DONE: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - kibnal_handle_completion(conn, IBNAL_MSG_GET_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - } - - if (rc < 0) /* protocol error */ - kibnal_close_conn(conn, rc); - - if (repost) { - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) - rsrvd_credit = 0; /* peer isn't pre-reserving */ - - kibnal_post_rx(rx, !rsrvd_credit, rsrvd_credit); - } -} - -void -kibnal_rx_complete (IB_WORK_COMPLETION *wc, __u64 rxseq) -{ - kib_rx_t *rx = (kib_rx_t *)kibnal_wreqid2ptr(wc->WorkReqId); - int nob = wc->Length; - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - unsigned long flags; - int rc; - int err = -EIO; - - LASSERT (rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - mb(); - - /* receives complete with error in any case after we've started - * disconnecting */ - if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) - goto ignore; - - if (wc->Status != WRStatusSuccess) { - CERROR("Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), wc->Status); - goto failed; - } - - rc = kibnal_unpack_msg(msg, conn->ibc_version, nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - rx->rx_nob = nob; /* Now I know nob > 0 */ - mb(); - - if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || - msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR ("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - err = -ESTALE; - goto failed; - } - - if (msg->ibm_seq != rxseq) { - CERROR ("Out-of-sequence rx from %s" - ": got "LPD64" but expected "LPD64"\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - msg->ibm_seq, rxseq); - goto failed; - } - - /* set time last known alive */ - kibnal_peer_alive(conn->ibc_peer); - - /* racing with connection establishment/teardown! */ - - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - /* must check holding global lock to eliminate race */ - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - list_add_tail(&rx->rx_list, &conn->ibc_early_rxs); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return; - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - } - kibnal_handle_rx(rx); - return; - - failed: - kibnal_close_conn(conn, err); - ignore: - /* Don't re-post rx & drop its ref on conn */ - kibnal_conn_decref(conn); -} - -struct page * -kibnal_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) { - page = vmalloc_to_page ((void *)vaddr); - LASSERT (page != NULL); - return page; - } -#ifdef CONFIG_HIGHMEM - if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) { - /* No highmem pages only used for bulk (kiov) I/O */ - CERROR("find page for address in highmem\n"); - LBUG(); - } -#endif - page = virt_to_page (vaddr); - LASSERT (page != NULL); - return page; -} - -#if !IBNAL_USE_FMR -int -kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page, - unsigned long page_offset, unsigned long len) -{ - kib_rdma_frag_t *frag = &rd->rd_frags[rd->rd_nfrag]; - - if (rd->rd_nfrag >= IBNAL_MAX_RDMA_FRAGS) { - CERROR ("Too many RDMA fragments\n"); - return -EMSGSIZE; - } - - if (active) { - if (rd->rd_nfrag == 0) - rd->rd_key = kibnal_data.kib_whole_mem.md_lkey; - } else { - if (rd->rd_nfrag == 0) - rd->rd_key = kibnal_data.kib_whole_mem.md_rkey; - } - - frag->rf_nob = len; - frag->rf_addr = kibnal_data.kib_whole_mem.md_addr + - lnet_page2phys(page) + page_offset; - - CDEBUG(D_NET,"map key %x frag [%d]["LPX64" for %d]\n", - rd->rd_key, rd->rd_nfrag, frag->rf_addr, frag->rf_nob); - - rd->rd_nfrag++; - return 0; -} - -int -kibnal_setup_rd_iov(kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - int fragnob; - int rc; - unsigned long vaddr; - struct page *page; - int page_offset; - - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - rd->rd_nfrag = 0; - do { - LASSERT (niov > 0); - - vaddr = ((unsigned long)iov->iov_base) + offset; - page_offset = vaddr & (PAGE_SIZE - 1); - page = kibnal_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR ("Can't find page\n"); - return -EFAULT; - } - - fragnob = min((int)(iov->iov_len - offset), nob); - fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); - - rc = kibnal_append_rdfrag(rd, active, page, - page_offset, fragnob); - if (rc != 0) - return rc; - - if (offset + fragnob < iov->iov_len) { - offset += fragnob; - } else { - offset = 0; - iov++; - niov--; - } - nob -= fragnob; - } while (nob > 0); - - return 0; -} - -int -kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - int fragnob; - int rc; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - rd->rd_nfrag = 0; - do { - LASSERT (nkiov > 0); - fragnob = min((int)(kiov->kiov_len - offset), nob); - - rc = kibnal_append_rdfrag(rd, active, kiov->kiov_page, - kiov->kiov_offset + offset, - fragnob); - if (rc != 0) - return rc; - - offset = 0; - kiov++; - nkiov--; - nob -= fragnob; - } while (nob > 0); - - return 0; -} -#else -int -kibnal_map_tx (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - int npages, unsigned long page_offset, int nob) -{ - IB_ACCESS_CONTROL access = {0,}; - FSTATUS frc; - - LASSERT ((rd != tx->tx_rd) == !active); - LASSERT (!tx->tx_md.md_active); - LASSERT (tx->tx_md.md_fmrcount > 0); - LASSERT (page_offset < PAGE_SIZE); - LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT))); - LASSERT (npages <= LNET_MAX_IOV); - - if (!active) { - // access.s.MWBindable = 1; - access.s.LocalWrite = 1; - access.s.RdmaWrite = 1; - } - - /* Map the memory described by tx->tx_pages - frc = iibt_register_physical_memory(kibnal_data.kib_hca, - IBNAL_RDMA_BASE, - tx->tx_pages, npages, - page_offset, - kibnal_data.kib_pd, - access, - &tx->tx_md.md_handle, - &tx->tx_md.md_addr, - &tx->tx_md.md_lkey, - &tx->tx_md.md_rkey); - */ - return -EINVAL; -} - -int -kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - int resid; - int fragnob; - struct page *page; - int npages; - unsigned long page_offset; - unsigned long vaddr; - - LASSERT (nob > 0); - LASSERT (niov > 0); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR ("Can't map multiple vaddr fragments\n"); - return (-EMSGSIZE); - } - - vaddr = ((unsigned long)iov->iov_base) + offset; - - page_offset = vaddr & (PAGE_SIZE - 1); - resid = nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - - page = kibnal_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR("Can't find page for %lu\n", vaddr); - return -EFAULT; - } - - tx->tx_pages[npages++] = lnet_page2phys(page); - - fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1)); - vaddr += fragnob; - resid -= fragnob; - - } while (resid > 0); - - return kibnal_map_tx(tx, rd, active, npages, page_offset, nob); -} - -int -kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - int resid; - int npages; - unsigned long page_offset; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (nkiov <= LNET_MAX_IOV); - LASSERT (!tx->tx_md.md_active); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - page_offset = kiov->kiov_offset + offset; - - resid = offset + nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - LASSERT (nkiov > 0); - - if ((npages > 0 && kiov->kiov_offset != 0) || - (resid > kiov->kiov_len && - (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) { - /* Can't have gaps */ - CERROR ("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", - npages, kiov->kiov_offset, kiov->kiov_len); - - return -EINVAL; - } - - tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page); - resid -= kiov->kiov_len; - kiov++; - nkiov--; - } while (resid > 0); - - return kibnal_map_tx(tx, rd, active, npages, page_offset, nob); -} -#endif - -kib_conn_t * -kibnal_find_conn_locked (kib_peer_t *peer) -{ - struct list_head *tmp; - - /* just return the first connection */ - list_for_each (tmp, &peer->ibp_conns) { - return (list_entry(tmp, kib_conn_t, ibc_list)); - } - - return (NULL); -} - -void -kibnal_check_sends (kib_conn_t *conn) -{ - kib_tx_t *tx; - FSTATUS frc; - int rc; - int consume_cred; - int done; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - spin_lock(&conn->ibc_lock); - - LASSERT (conn->ibc_nsends_posted <= - *kibnal_tunables.kib_concurrent_sends); - LASSERT (conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_nocred) && - (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER || - kibnal_send_keepalive(conn))) { - spin_unlock(&conn->ibc_lock); - - tx = kibnal_get_idle_tx(); - if (tx != NULL) - kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0); - - spin_lock(&conn->ibc_lock); - - if (tx != NULL) - kibnal_queue_tx_locked(tx, conn); - } - - for (;;) { - if (!list_empty(&conn->ibc_tx_queue_nocred)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry (conn->ibc_tx_queue_nocred.next, - kib_tx_t, tx_list); - consume_cred = 0; - } else if (!list_empty (&conn->ibc_tx_queue)) { - tx = list_entry (conn->ibc_tx_queue.next, - kib_tx_t, tx_list); - consume_cred = 1; - } else { - /* nothing waiting */ - break; - } - - LASSERT (tx->tx_queued); - /* We rely on this for QP sizing */ - LASSERT (tx->tx_nwrq > 0 && tx->tx_nwrq <= 1 + IBNAL_MAX_RDMA_FRAGS); - - LASSERT (conn->ibc_outstanding_credits >= 0); - LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE); - LASSERT (conn->ibc_credits >= 0); - LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE); - - if (conn->ibc_nsends_posted == - *kibnal_tunables.kib_concurrent_sends) { - /* We've got some tx completions outstanding... */ - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (consume_cred) { - if (conn->ibc_credits == 0) { /* no credits */ - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (conn->ibc_credits == 1 && /* last credit reserved for */ - conn->ibc_outstanding_credits == 0) { /* giving back credits */ - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - } - - list_del (&tx->tx_list); - tx->tx_queued = 0; - - /* NB don't drop ibc_lock before bumping tx_sending */ - - if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP && - (!list_empty(&conn->ibc_tx_queue) || - !list_empty(&conn->ibc_tx_queue_nocred) || - (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER && - !kibnal_send_keepalive(conn)))) { - /* redundant NOOP */ - spin_unlock(&conn->ibc_lock); - kibnal_tx_done(tx); - spin_lock(&conn->ibc_lock); - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - continue; - } - - kibnal_pack_msg(tx->tx_msg, conn->ibc_version, - conn->ibc_outstanding_credits, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation, - conn->ibc_txseq); - - conn->ibc_txseq++; - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (consume_cred) - conn->ibc_credits--; - - /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA - * PUT. If so, it was first queued here as a PUT_REQ, sent and - * stashed on ibc_active_txs, matched by an incoming PUT_ACK, - * and then re-queued here. It's (just) possible that - * tx_sending is non-zero if we've not done the tx_complete() from - * the first send; hence the ++ rather than = below. */ - tx->tx_sending++; - - list_add (&tx->tx_list, &conn->ibc_active_txs); - - LASSERT (tx->tx_nwrq > 0); - - rc = 0; - frc = FSUCCESS; - if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) { - rc = -ECONNABORTED; - } else { - frc = iba_post_send2(conn->ibc_qp, tx->tx_wrq, NULL); - if (frc != FSUCCESS) - rc = -EIO; - } - - conn->ibc_last_send = jiffies; - - if (rc != 0) { - /* NB credits are transferred in the actual - * message, which can only be the last work item */ - conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; - if (consume_cred) - conn->ibc_credits++; - conn->ibc_nsends_posted--; - - tx->tx_status = rc; - tx->tx_waiting = 0; - tx->tx_sending--; - - done = (tx->tx_sending == 0); - if (done) - list_del (&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) - CERROR ("Error %d posting transmit to %s\n", - frc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - else - CDEBUG (D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_close_conn (conn, rc); - - if (done) - kibnal_tx_done (tx); - return; - } - } - - spin_unlock(&conn->ibc_lock); -} - -void -kibnal_tx_complete (IB_WORK_COMPLETION *wc) -{ - kib_tx_t *tx = (kib_tx_t *)kibnal_wreqid2ptr(wc->WorkReqId); - kib_conn_t *conn = tx->tx_conn; - int failed = wc->Status != WRStatusSuccess; - int idle; - - CDEBUG(D_NET, "%s: sending %d nwrq %d status %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_sending, tx->tx_nwrq, wc->Status); - - LASSERT (tx->tx_sending > 0); - - if (failed && - tx->tx_status == 0 && - conn->ibc_state == IBNAL_CONN_ESTABLISHED) { -#if KIBLND_DETAILED_DEBUG - int i; - IB_WORK_REQ2 *wrq = &tx->tx_wrq[0]; - IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[0]; - lnet_msg_t *lntmsg = tx->tx_lntmsg[0]; -#endif - CDEBUG(D_NETERROR, "tx -> %s type %x cookie "LPX64 - " sending %d waiting %d failed %d nwrk %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_msg->ibm_type, tx->tx_cookie, - tx->tx_sending, tx->tx_waiting, wc->Status, - tx->tx_nwrq); -#if KIBLND_DETAILED_DEBUG - for (i = 0; i < tx->tx_nwrq; i++, wrq++, gl++) { - switch (wrq->Operation) { - default: - CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p OP %d " - "DSList %p(%p)/%d: "LPX64"/%d K %x\n", - i, wrq, wrq->Next, wrq->Operation, - wrq->DSList, gl, wrq->DSListDepth, - gl->Address, gl->Length, gl->Lkey); - break; - case WROpSend: - CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p SEND " - "DSList %p(%p)/%d: "LPX64"/%d K %x\n", - i, wrq, wrq->Next, - wrq->DSList, gl, wrq->DSListDepth, - gl->Address, gl->Length, gl->Lkey); - break; - case WROpRdmaWrite: - CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p DMA " - "DSList: %p(%p)/%d "LPX64"/%d K %x -> " - LPX64" K %x\n", - i, wrq, wrq->Next, - wrq->DSList, gl, wrq->DSListDepth, - gl->Address, gl->Length, gl->Lkey, - wrq->Req.SendRC.RemoteDS.Address, - wrq->Req.SendRC.RemoteDS.Rkey); - break; - } - } - - switch (tx->tx_msg->ibm_type) { - default: - CDEBUG(D_NETERROR, " msg type %x %p/%d, No RDMA\n", - tx->tx_msg->ibm_type, - tx->tx_msg, tx->tx_msg->ibm_nob); - break; - - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - CDEBUG(D_NETERROR, " msg type %x %p/%d, RDMA key %x frags %d...\n", - tx->tx_msg->ibm_type, - tx->tx_msg, tx->tx_msg->ibm_nob, - tx->tx_rd->rd_key, tx->tx_rd->rd_nfrag); - for (i = 0; i < tx->tx_rd->rd_nfrag; i++) - CDEBUG(D_NETERROR, " [%d] "LPX64"/%d\n", i, - tx->tx_rd->rd_frags[i].rf_addr, - tx->tx_rd->rd_frags[i].rf_nob); - if (lntmsg == NULL) { - CDEBUG(D_NETERROR, " No lntmsg\n"); - } else if (lntmsg->msg_iov != NULL) { - CDEBUG(D_NETERROR, " lntmsg in %d VIRT frags...\n", - lntmsg->msg_niov); - for (i = 0; i < lntmsg->msg_niov; i++) - CDEBUG(D_NETERROR, " [%d] %p/%d\n", i, - lntmsg->msg_iov[i].iov_base, - lntmsg->msg_iov[i].iov_len); - } else if (lntmsg->msg_kiov != NULL) { - CDEBUG(D_NETERROR, " lntmsg in %d PAGE frags...\n", - lntmsg->msg_niov); - for (i = 0; i < lntmsg->msg_niov; i++) - CDEBUG(D_NETERROR, " [%d] %p+%d/%d\n", i, - lntmsg->msg_kiov[i].kiov_page, - lntmsg->msg_kiov[i].kiov_offset, - lntmsg->msg_kiov[i].kiov_len); - } else { - CDEBUG(D_NETERROR, " lntmsg in %d frags\n", - lntmsg->msg_niov); - } - - break; - } -#endif - } - - spin_lock(&conn->ibc_lock); - - /* I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. */ - - tx->tx_sending--; - conn->ibc_nsends_posted--; - - if (failed) { - tx->tx_waiting = 0; - tx->tx_status = -EIO; - } - - idle = (tx->tx_sending == 0) && /* This is the final callback */ - !tx->tx_waiting && /* Not waiting for peer */ - !tx->tx_queued; /* Not re-queued (PUT_DONE) */ - if (idle) - list_del(&tx->tx_list); - - kibnal_conn_addref(conn); /* 1 ref for me.... */ - - spin_unlock(&conn->ibc_lock); - - if (idle) - kibnal_tx_done (tx); - - if (failed) { - kibnal_close_conn (conn, -EIO); - } else { - kibnal_peer_alive(conn->ibc_peer); - kibnal_check_sends(conn); - } - - kibnal_conn_decref(conn); /* ...until here */ -} - -void -kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob) -{ - IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[tx->tx_nwrq]; - IB_WORK_REQ2 *wrq = &tx->tx_wrq[tx->tx_nwrq]; - int nob = offsetof (kib_msg_t, ibm_u) + body_nob; - - LASSERT (tx->tx_nwrq >= 0 && - tx->tx_nwrq < (1 + IBNAL_MAX_RDMA_FRAGS)); - LASSERT (nob <= IBNAL_MSG_SIZE); - - kibnal_init_msg(tx->tx_msg, type, body_nob); - - *gl = (IB_LOCAL_DATASEGMENT) { - .Address = tx->tx_hca_msg, - .Length = IBNAL_MSG_SIZE, - .Lkey = kibnal_data.kib_whole_mem.md_lkey, - }; - - wrq->Next = NULL; /* This is the last one */ - - wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_TX); - wrq->Operation = WROpSend; - wrq->DSList = gl; - wrq->DSListDepth = 1; - wrq->MessageLen = nob; - wrq->Req.SendRC.ImmediateData = 0; - wrq->Req.SendRC.Options.s.SolicitedEvent = 1; - wrq->Req.SendRC.Options.s.SignaledCompletion = 1; - wrq->Req.SendRC.Options.s.ImmediateData = 0; - wrq->Req.SendRC.Options.s.Fence = 0; - /* fence only needed on RDMA reads */ - - tx->tx_nwrq++; -} - -int -kibnal_init_rdma (kib_tx_t *tx, int type, int nob, - kib_rdma_desc_t *dstrd, __u64 dstcookie) -{ - kib_msg_t *ibmsg = tx->tx_msg; - kib_rdma_desc_t *srcrd = tx->tx_rd; - IB_LOCAL_DATASEGMENT *gl; - IB_WORK_REQ2 *wrq; - int rc; - -#if IBNAL_USE_FMR - LASSERT (tx->tx_nwrq == 0); - - gl = &tx->tx_gl[0]; - gl->Length = nob; - gl->Address = srcrd->rd_addr; - gl->Lkey = srcrd->rd_key; - - wrq = &tx->tx_wrq[0]; - - wrq->Next = wrq + 1; - wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); - wrq->Operation = WROpRdmaWrite; - wrq->DSList = gl; - wrq->DSListDepth = 1; - wrq->MessageLen = nob; - - wrq->Req.SendRC.ImmediateData = 0; - wrq->Req.SendRC.Options.s.SolicitedEvent = 0; - wrq->Req.SendRC.Options.s.SignaledCompletion = 0; - wrq->Req.SendRC.Options.s.ImmediateData = 0; - wrq->Req.SendRC.Options.s.Fence = 0; - - wrq->Req.SendRC.RemoteDS.Address = dstrd->rd_addr; - wrq->Req.SendRC.RemoteDS.Rkey = dstrd->rd_key; - - tx->tx_nwrq = 1; - rc = nob; -#else - /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */ - int resid = nob; - kib_rdma_frag_t *srcfrag; - int srcidx; - kib_rdma_frag_t *dstfrag; - int dstidx; - int wrknob; - - /* Called by scheduler */ - LASSERT (!in_interrupt()); - - LASSERT (type == IBNAL_MSG_GET_DONE || - type == IBNAL_MSG_PUT_DONE); - - srcidx = dstidx = 0; - srcfrag = &srcrd->rd_frags[0]; - dstfrag = &dstrd->rd_frags[0]; - rc = resid; - - while (resid > 0) { - if (srcidx >= srcrd->rd_nfrag) { - CERROR("Src buffer exhausted: %d frags\n", srcidx); - rc = -EPROTO; - break; - } - - if (dstidx == dstrd->rd_nfrag) { - CERROR("Dst buffer exhausted: %d frags\n", dstidx); - rc = -EPROTO; - break; - } - - if (tx->tx_nwrq == IBNAL_MAX_RDMA_FRAGS) { - CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n", - srcidx, srcrd->rd_nfrag, - dstidx, dstrd->rd_nfrag); - rc = -EMSGSIZE; - break; - } - - wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid); - - gl = &tx->tx_gl[tx->tx_nwrq]; - gl->Length = wrknob; - gl->Address = srcfrag->rf_addr; - gl->Lkey = srcrd->rd_key; - - wrq = &tx->tx_wrq[tx->tx_nwrq]; - - wrq->Next = wrq + 1; - wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); - wrq->Operation = WROpRdmaWrite; - wrq->DSList = gl; - wrq->DSListDepth = 1; - wrq->MessageLen = nob; - - wrq->Req.SendRC.ImmediateData = 0; - wrq->Req.SendRC.Options.s.SolicitedEvent = 0; - wrq->Req.SendRC.Options.s.SignaledCompletion = 0; - wrq->Req.SendRC.Options.s.ImmediateData = 0; - wrq->Req.SendRC.Options.s.Fence = 0; - - wrq->Req.SendRC.RemoteDS.Address = dstfrag->rf_addr; - wrq->Req.SendRC.RemoteDS.Rkey = dstrd->rd_key; - - resid -= wrknob; - if (wrknob < srcfrag->rf_nob) { - srcfrag->rf_addr += wrknob; - srcfrag->rf_nob -= wrknob; - } else { - srcfrag++; - srcidx++; - } - - if (wrknob < dstfrag->rf_nob) { - dstfrag->rf_addr += wrknob; - dstfrag->rf_nob -= wrknob; - } else { - dstfrag++; - dstidx++; - } - - tx->tx_nwrq++; - } - - if (rc < 0) /* no RDMA if completing with failure */ - tx->tx_nwrq = 0; -#endif - - ibmsg->ibm_u.completion.ibcm_status = rc; - ibmsg->ibm_u.completion.ibcm_cookie = dstcookie; - kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t)); - - return rc; -} - -void -kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn) -{ - spin_lock(&conn->ibc_lock); - kibnal_queue_tx_locked (tx, conn); - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); -} - -void -kibnal_schedule_active_connect_locked (kib_peer_t *peer, int proto_version) -{ - /* Called holding kib_global_lock exclusive with IRQs disabled */ - - peer->ibp_version = proto_version; /* proto version for new conn */ - peer->ibp_connecting++; /* I'm connecting */ - kibnal_peer_addref(peer); /* extra ref for connd */ - - spin_lock(&kibnal_data.kib_connd_lock); - - list_add_tail (&peer->ibp_connd_list, &kibnal_data.kib_connd_peers); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock(&kibnal_data.kib_connd_lock); -} - -void -kibnal_schedule_active_connect (kib_peer_t *peer, int proto_version) -{ - unsigned long flags; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - kibnal_schedule_active_connect_locked(peer, proto_version); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid) -{ - kib_peer_t *peer; - kib_conn_t *conn; - unsigned long flags; - rwlock_t *g_lock = &kibnal_data.kib_global_lock; - int retry; - int rc; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx->tx_nwrq > 0); /* work items have been set up */ - - for (retry = 0; ; retry = 1) { - read_lock_irqsave(g_lock, flags); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) { - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - kibnal_conn_addref(conn); /* 1 ref for me... */ - read_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...to here */ - return; - } - } - - /* Making one or more connections; I'll need a write lock... */ - read_unlock(g_lock); - write_lock(g_lock); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) - break; - - write_unlock_irqrestore(g_lock, flags); - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); - - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - - rc = kibnal_add_persistent_peer(nid); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_nid2str(nid), rc); - - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - } - - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - kibnal_conn_addref(conn); /* 1 ref for me... */ - write_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...until here */ - return; - } - - if (!kibnal_peer_connecting(peer)) { - if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */ - time_after_eq(jiffies, peer->ibp_reconnect_time))) { - write_unlock_irqrestore(g_lock, flags); - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - - kibnal_schedule_active_connect_locked(peer, IBNAL_MSG_VERSION); - } - - /* A connection is being established; queue the message... */ - list_add_tail (&tx->tx_list, &peer->ibp_tx_queue); - - write_unlock_irqrestore(g_lock, flags); -} - -void -kibnal_txlist_done (struct list_head *txlist, int status) -{ - kib_tx_t *tx; - - while (!list_empty (txlist)) { - tx = list_entry (txlist->next, kib_tx_t, tx_list); - - list_del (&tx->tx_list); - /* complete now */ - tx->tx_waiting = 0; - tx->tx_status = status; - kibnal_tx_done (tx); - } -} - -int -kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kib_msg_t *ibmsg; - kib_tx_t *tx; - int nob; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* Thread context */ - LASSERT (!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - switch (type) { - default: - LBUG(); - return (-EIO); - - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can allocate txd for GET to %s: \n", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.get.ibgm_hdr = *hdr; - ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie; - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - rc = kibnal_setup_rd_iov(tx, &ibmsg->ibm_u.get.ibgm_rd, - 0, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kibnal_setup_rd_kiov(tx, &ibmsg->ibm_u.get.ibgm_rd, - 0, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc != 0) { - CERROR("Can't setup GET sink for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kibnal_tx_done(tx); - return -EIO; - } - -#if IBNAL_USE_FMR - nob = sizeof(kib_get_msg_t); -#else - { - int n = ibmsg->ibm_u.get.ibgm_rd.rd_nfrag; - - nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]); - } -#endif - kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob); - - tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni, - lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR("Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - kibnal_tx_done(tx); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */ - tx->tx_waiting = 1; /* waiting for GET_DONE */ - kibnal_launch_tx(tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't allocate %s txd for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - if (payload_kiov == NULL) - rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 1, - payload_niov, payload_iov, - payload_offset, payload_nob); - else - rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 1, - payload_niov, payload_kiov, - payload_offset, payload_nob); - if (rc != 0) { - CERROR("Can't setup PUT src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kibnal_tx_done(tx); - return -EIO; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.putreq.ibprm_hdr = *hdr; - ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie; - kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_REQ, sizeof(kib_putreq_msg_t)); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */ - kibnal_launch_tx(tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]) - <= IBNAL_MSG_SIZE); - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR ("Can't send %d to %s: tx descs exhausted\n", - type, libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); - - nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]); - kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - kibnal_launch_tx(tx, target.nid); - return 0; -} - -void -kibnal_reply(lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg) -{ - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kib_tx_t *tx; - int rc; - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't get tx for REPLY to %s\n", - libcfs_nid2str(target.nid)); - goto failed_0; - } - - if (nob == 0) - rc = 0; - else if (kiov == NULL) - rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 1, - niov, iov, offset, nob); - else - rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 1, - niov, kiov, offset, nob); - - if (rc != 0) { - CERROR("Can't setup GET src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - rc = kibnal_init_rdma(tx, IBNAL_MSG_GET_DONE, nob, - &rx->rx_msg->ibm_u.get.ibgm_rd, - rx->rx_msg->ibm_u.get.ibgm_cookie); - if (rc < 0) { - CERROR("Can't setup rdma for GET from %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - if (rc == 0) { - /* No RDMA: local completion may happen now! */ - lnet_finalize(ni, lntmsg, 0); - } else { - /* RDMA: lnet_finalize(lntmsg) when it - * completes */ - tx->tx_lntmsg[0] = lntmsg; - } - - kibnal_queue_tx(tx, rx->rx_conn); - return; - - failed_1: - kibnal_tx_done(tx); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - void **new_private) -{ - kib_rx_t *rx = private; - kib_conn_t *conn = rx->rx_conn; - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* Can't block if RDMA completions need normal credits */ - LCONSOLE_ERROR_MSG(0x12d, "Dropping message from %s: no " - "buffers free. %s is running an old version" - " of LNET that may deadlock if messages " - "wait for buffers)\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return -EDEADLK; - } - - *new_private = private; - return 0; -} - -int -kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kib_rx_t *rx = private; - kib_msg_t *rxmsg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - kib_tx_t *tx; - kib_msg_t *txmsg; - int nob; - int post_cred = 1; - int rc = 0; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBNAL_MSG_IMMEDIATE: - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov(niov, kiov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - lnet_finalize (ni, lntmsg, 0); - break; - - case IBNAL_MSG_PUT_REQ: - if (mlen == 0) { - lnet_finalize(ni, lntmsg, 0); - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't allocate tx for %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* Not replying will break the connection */ - rc = -ENOMEM; - break; - } - - txmsg = tx->tx_msg; - if (kiov == NULL) - rc = kibnal_setup_rd_iov(tx, - &txmsg->ibm_u.putack.ibpam_rd, - 0, - niov, iov, offset, mlen); - else - rc = kibnal_setup_rd_kiov(tx, - &txmsg->ibm_u.putack.ibpam_rd, - 0, - niov, kiov, offset, mlen); - if (rc != 0) { - CERROR("Can't setup PUT sink for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kibnal_tx_done(tx); - /* tell peer it's over */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie; - txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie; -#if IBNAL_USE_FMR - nob = sizeof(kib_putack_msg_t); -#else - { - int n = tx->tx_msg->ibm_u.putack.ibpam_rd.rd_nfrag; - - nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]); - } -#endif - kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_ACK, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_DONE */ - kibnal_queue_tx(tx, conn); - - if (conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) - post_cred = 0; /* peer still owns 'rx' for sending PUT_DONE */ - break; - - case IBNAL_MSG_GET_REQ: - if (lntmsg != NULL) { - /* Optimized GET; RDMA lntmsg's payload */ - kibnal_reply(ni, rx, lntmsg); - } else { - /* GET didn't match anything */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE, - -ENODATA, - rxmsg->ibm_u.get.ibgm_cookie); - } - break; - } - - kibnal_post_rx(rx, post_cred, 0); - return rc; -} - -int -kibnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kibnal_data.kib_nthreads); - return (0); -} - -void -kibnal_thread_fini (void) -{ - atomic_dec (&kibnal_data.kib_nthreads); -} - -void -kibnal_peer_alive (kib_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->ibp_last_alive = cfs_time_current(); - mb(); -} - -void -kibnal_peer_notify (kib_peer_t *peer) -{ - time_t last_alive = 0; - int error = 0; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (list_empty(&peer->ibp_conns) && - peer->ibp_accepting == 0 && - peer->ibp_connecting == 0 && - peer->ibp_error != 0) { - error = peer->ibp_error; - peer->ibp_error = 0; - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive); - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (error != 0) - lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive); -} - -void -kibnal_schedule_conn (kib_conn_t *conn) -{ - unsigned long flags; - - kibnal_conn_addref(conn); /* ++ref for connd */ - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); -} - -void -kibnal_close_conn_locked (kib_conn_t *conn, int error) -{ - /* This just does the immediate housekeeping to start shutdown of an - * established connection. 'error' is zero for a normal shutdown. - * Caller holds kib_global_lock exclusively in irq context */ - kib_peer_t *peer = conn->ibc_peer; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) - return; /* already being handled */ - - /* NB Can't take ibc_lock here (could be in IRQ context), without - * risking deadlock, so access to ibc_{tx_queue,active_txs} is racey */ - - if (error == 0 && - list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_rsrvd) && - list_empty(&conn->ibc_tx_queue_nocred) && - list_empty(&conn->ibc_active_txs)) { - CDEBUG(D_NET, "closing conn to %s" - " rx# "LPD64" tx# "LPD64"\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_txseq, conn->ibc_rxseq); - } else { - CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s" - " rx# "LPD64" tx# "LPD64"\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)", - conn->ibc_txseq, conn->ibc_rxseq); -#if 0 - /* can't skip down the queue without holding ibc_lock (see above) */ - list_for_each(tmp, &conn->ibc_tx_queue) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - CERROR(" queued tx type %x cookie "LPX64 - " sending %d waiting %d ticks %ld/%d\n", - tx->tx_msg->ibm_type, tx->tx_cookie, - tx->tx_sending, tx->tx_waiting, - (long)(tx->tx_deadline - jiffies), HZ); - } - - list_for_each(tmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - CERROR(" active tx type %x cookie "LPX64 - " sending %d waiting %d ticks %ld/%d\n", - tx->tx_msg->ibm_type, tx->tx_cookie, - tx->tx_sending, tx->tx_waiting, - (long)(tx->tx_deadline - jiffies), HZ); - } -#endif - } - - list_del (&conn->ibc_list); - - if (list_empty (&peer->ibp_conns)) { /* no more conns */ - if (peer->ibp_persistence == 0 && /* non-persistent peer */ - kibnal_peer_active(peer)) /* still in peer table */ - kibnal_unlink_peer_locked (peer); - - peer->ibp_error = error; /* set/clear error on last conn */ - } - - kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTING); - - kibnal_schedule_conn(conn); - kibnal_conn_decref(conn); /* lose ibc_list's ref */ -} - -void -kibnal_close_conn (kib_conn_t *conn, int error) -{ - unsigned long flags; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - kibnal_close_conn_locked (conn, error); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_handle_early_rxs(kib_conn_t *conn) -{ - unsigned long flags; - kib_rx_t *rx; - - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - while (!list_empty(&conn->ibc_early_rxs)) { - rx = list_entry(conn->ibc_early_rxs.next, - kib_rx_t, rx_list); - list_del(&rx->rx_list); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_handle_rx(rx); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_abort_txs(kib_conn_t *conn, struct list_head *txs) -{ - LIST_HEAD (zombies); - struct list_head *tmp; - struct list_head *nxt; - kib_tx_t *tx; - - spin_lock(&conn->ibc_lock); - - list_for_each_safe (tmp, nxt, txs) { - tx = list_entry (tmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_queued = 0; - tx->tx_waiting = 0; - - if (tx->tx_sending == 0) { - list_del (&tx->tx_list); - list_add (&tx->tx_list, &zombies); - } - } - - spin_unlock(&conn->ibc_lock); - - kibnal_txlist_done(&zombies, -ECONNABORTED); -} - -void -kibnal_conn_disconnected(kib_conn_t *conn) -{ - static IB_QP_ATTRIBUTES_MODIFY qpam = {.RequestState = QPStateError}; - - FSTATUS frc; - - LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP); - - kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTED); - - /* move QP to error state to make posted work items complete */ - frc = iba_modify_qp(conn->ibc_qp, &qpam, NULL); - if (frc != FSUCCESS) - CERROR("can't move qp state to error: %d\n", frc); - - /* Complete all tx descs not waiting for sends to complete. - * NB we should be safe from RDMA now that the QP has changed state */ - - kibnal_abort_txs(conn, &conn->ibc_tx_queue); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kibnal_abort_txs(conn, &conn->ibc_tx_queue); - kibnal_abort_txs(conn, &conn->ibc_active_txs); - - kibnal_handle_early_rxs(conn); -} - -void -kibnal_peer_connect_failed (kib_peer_t *peer, int type, int error) -{ - LIST_HEAD (zombies); - unsigned long flags; - - LASSERT (error != 0); - LASSERT (!in_interrupt()); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - LASSERT (kibnal_peer_connecting(peer)); - - switch (type) { - case IBNAL_CONN_ACTIVE: - LASSERT (peer->ibp_connecting > 0); - peer->ibp_connecting--; - break; - - case IBNAL_CONN_PASSIVE: - LASSERT (peer->ibp_accepting > 0); - peer->ibp_accepting--; - break; - - case IBNAL_CONN_WAITING: - /* Can't assert; I might be racing with a successful connection - * which clears passivewait */ - peer->ibp_passivewait = 0; - break; - default: - LBUG(); - } - - if (kibnal_peer_connecting(peer) || /* another attempt underway */ - !list_empty(&peer->ibp_conns)) { /* got connected */ - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return; - } - - /* Say when active connection can be re-attempted */ - peer->ibp_reconnect_interval *= 2; - peer->ibp_reconnect_interval = - MAX(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_min_reconnect_interval); - peer->ibp_reconnect_interval = - MIN(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_max_reconnect_interval); - - peer->ibp_reconnect_time = jiffies + peer->ibp_reconnect_interval * HZ; - - /* Take peer's blocked transmits to complete with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kibnal_peer_active(peer) && - peer->ibp_persistence == 0) { - /* failed connection attempt on non-persistent peer */ - kibnal_unlink_peer_locked (peer); - } - - peer->ibp_error = error; - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_peer_notify(peer); - - if (list_empty (&zombies)) - return; - - CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_txlist_done (&zombies, -EHOSTUNREACH); -} - -void -kibnal_connreq_done (kib_conn_t *conn, int type, int status) -{ - kib_peer_t *peer = conn->ibc_peer; - struct list_head txs; - kib_tx_t *tx; - unsigned long flags; - - LASSERT (!in_interrupt()); - LASSERT (type == IBNAL_CONN_ACTIVE || type == IBNAL_CONN_PASSIVE); - LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP); - LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED); - LASSERT (kibnal_peer_connecting(peer)); - - LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars)); - conn->ibc_cvars = NULL; - - if (status != 0) { - /* failed to establish connection */ - kibnal_peer_connect_failed(conn->ibc_peer, type, status); - kibnal_conn_disconnected(conn); - kibnal_conn_decref(conn); /* Lose CM's ref */ - return; - } - - /* connection established */ - LASSERT(conn->ibc_state == IBNAL_CONN_CONNECTING); - - conn->ibc_last_send = jiffies; - kibnal_set_conn_state(conn, IBNAL_CONN_ESTABLISHED); - kibnal_peer_alive(peer); - - CDEBUG(D_NET, "Connection %s ESTABLISHED\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - peer->ibp_passivewait = 0; /* not waiting (got conn now) */ - kibnal_conn_addref(conn); /* +1 ref for ibc_list */ - list_add_tail(&conn->ibc_list, &peer->ibp_conns); - - if (!kibnal_peer_active(peer)) { - /* peer has been deleted */ - kibnal_close_conn_locked(conn, -ECONNABORTED); - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_peer_connect_failed(conn->ibc_peer, type, -ECONNABORTED); - kibnal_conn_decref(conn); /* lose CM's ref */ - return; - } - - switch (type) { - case IBNAL_CONN_ACTIVE: - LASSERT (peer->ibp_connecting > 0); - peer->ibp_connecting--; - break; - - case IBNAL_CONN_PASSIVE: - LASSERT (peer->ibp_accepting > 0); - peer->ibp_accepting--; - break; - default: - LBUG(); - } - - peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */ - - /* Nuke any dangling conns from a different peer instance... */ - kibnal_close_stale_conns_locked(peer, conn->ibc_incarnation); - - /* grab txs blocking for a conn */ - list_add(&txs, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - /* Schedule blocked txs */ - spin_lock (&conn->ibc_lock); - while (!list_empty (&txs)) { - tx = list_entry (txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - kibnal_queue_tx_locked (tx, conn); - } - spin_unlock (&conn->ibc_lock); - kibnal_check_sends (conn); -} - -void -kibnal_reject (lnet_nid_t nid, IB_HANDLE cep, int why) -{ - static CM_REJECT_INFO msgs[3]; - CM_REJECT_INFO *msg = &msgs[why]; - FSTATUS frc; - - LASSERT (why >= 0 && why < sizeof(msgs)/sizeof(msgs[0])); - - /* If I wasn't so lazy, I'd initialise this only once; it's effectively - * read-only... */ - msg->Reason = RC_USER_REJ; - msg->PrivateData[0] = (IBNAL_MSG_MAGIC) & 0xff; - msg->PrivateData[1] = (IBNAL_MSG_MAGIC >> 8) & 0xff; - msg->PrivateData[2] = (IBNAL_MSG_MAGIC >> 16) & 0xff; - msg->PrivateData[3] = (IBNAL_MSG_MAGIC >> 24) & 0xff; - msg->PrivateData[4] = (IBNAL_MSG_VERSION) & 0xff; - msg->PrivateData[5] = (IBNAL_MSG_VERSION >> 8) & 0xff; - msg->PrivateData[6] = why; - - frc = iba_cm_reject(cep, msg); - if (frc != FSUCCESS) - CERROR("Error %d rejecting %s\n", frc, libcfs_nid2str(nid)); -} - -void -kibnal_check_connreject(kib_conn_t *conn, int type, CM_REJECT_INFO *rej) -{ - kib_peer_t *peer = conn->ibc_peer; - unsigned long flags; - int magic; - int version; - int why; - - LASSERT (type == IBNAL_CONN_ACTIVE || - type == IBNAL_CONN_PASSIVE); - - CDEBUG(D_NET, "%s connection with %s rejected: %d\n", - (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive", - libcfs_nid2str(peer->ibp_nid), rej->Reason); - - switch (rej->Reason) { - case RC_STALE_CONN: - if (type == IBNAL_CONN_PASSIVE) { - CERROR("Connection to %s rejected (stale QP)\n", - libcfs_nid2str(peer->ibp_nid)); - } else { - CWARN("Connection from %s rejected (stale QP): " - "retrying...\n", libcfs_nid2str(peer->ibp_nid)); - - /* retry from scratch to allocate a new conn - * which will use a different QP */ - kibnal_schedule_active_connect(peer, peer->ibp_version); - } - - /* An FCM_DISCONNECTED callback is still outstanding: give it a - * ref since kibnal_connreq_done() drops the CM's ref on conn - * on failure */ - kibnal_conn_addref(conn); - break; - - case RC_USER_REJ: - magic = (rej->PrivateData[0]) | - (rej->PrivateData[1] << 8) | - (rej->PrivateData[2] << 16) | - (rej->PrivateData[3] << 24); - version = (rej->PrivateData[4]) | - (rej->PrivateData[5] << 8); - why = (rej->PrivateData[6]); - - /* retry with old proto version */ - if (magic == IBNAL_MSG_MAGIC && - version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD && - conn->ibc_version == IBNAL_MSG_VERSION && - type != IBNAL_CONN_PASSIVE) { - /* retry with a new conn */ - CWARN ("Connection to %s refused: " - "retrying with old protocol version 0x%x\n", - libcfs_nid2str(peer->ibp_nid), version); - kibnal_schedule_active_connect(peer, version); - break; - } - - if (magic != IBNAL_MSG_MAGIC || - version != IBNAL_MSG_VERSION) { - CERROR("%s connection with %s rejected " - "(magic/ver %08x/%d why %d): " - "incompatible protocol\n", - (type == IBNAL_CONN_ACTIVE) ? - "Active" : "Passive", - libcfs_nid2str(peer->ibp_nid), - magic, version, why); - break; - } - - if (type == IBNAL_CONN_ACTIVE && - why == IBNAL_REJECT_CONN_RACE) { - /* lost connection race */ - CWARN("Connection to %s rejected: " - "lost connection race\n", - libcfs_nid2str(peer->ibp_nid)); - - write_lock_irqsave(&kibnal_data.kib_global_lock, - flags); - - if (list_empty(&peer->ibp_conns)) { - peer->ibp_passivewait = 1; - peer->ibp_passivewait_deadline = - jiffies + - (*kibnal_tunables.kib_timeout * HZ); - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - break; - } - - CERROR("%s connection with %s rejected: %d\n", - (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive", - libcfs_nid2str(peer->ibp_nid), why); - break; - - default: - CERROR("%s connection with %s rejected: %d\n", - (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive", - libcfs_nid2str(peer->ibp_nid), rej->Reason); - } - - kibnal_connreq_done(conn, type, -ECONNREFUSED); -} - -void -kibnal_cm_disconnect_callback(kib_conn_t *conn, CM_CONN_INFO *info) -{ - CDEBUG(D_NET, "%s: state %d, status 0x%x\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - conn->ibc_state, info->Status); - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - switch (info->Status) { - default: - LBUG(); - break; - - case FCM_DISCONNECT_REQUEST: - /* Schedule conn to iba_cm_disconnect() if it wasn't already */ - kibnal_close_conn (conn, 0); - break; - - case FCM_DISCONNECT_REPLY: /* peer acks my disconnect req */ - case FCM_DISCONNECTED: /* end of TIME_WAIT */ - CDEBUG(D_NET, "Connection %s disconnected.\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_conn_decref(conn); /* Lose CM's ref */ - break; - } -} - -void -kibnal_cm_passive_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg) -{ - kib_conn_t *conn = arg; - - CDEBUG(D_NET, "status 0x%x\n", info->Status); - - /* Established Connection Notifier */ - switch (info->Status) { - default: - CERROR("Unexpected status %d on Connection %s\n", - info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - LBUG(); - break; - - case FCM_CONNECT_TIMEOUT: - kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ETIMEDOUT); - break; - - case FCM_CONNECT_REJECT: - kibnal_check_connreject(conn, IBNAL_CONN_PASSIVE, - &info->Info.Reject); - break; - - case FCM_CONNECT_ESTABLISHED: - kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, 0); - break; - - case FCM_DISCONNECT_REQUEST: - case FCM_DISCONNECT_REPLY: - case FCM_DISCONNECTED: - kibnal_cm_disconnect_callback(conn, info); - break; - } -} - -int -kibnal_accept (kib_conn_t **connp, IB_HANDLE cep, kib_msg_t *msg, int nob) -{ - lnet_nid_t nid; - kib_conn_t *conn; - kib_peer_t *peer; - kib_peer_t *peer2; - unsigned long flags; - int rc; - - rc = kibnal_unpack_msg(msg, 0, nob); - if (rc != 0) { - /* SILENT! kibnal_unpack_msg() complains if required */ - kibnal_reject(LNET_NID_ANY, cep, IBNAL_REJECT_FATAL); - return -EPROTO; - } - - nid = msg->ibm_srcnid; - - if (msg->ibm_version != IBNAL_MSG_VERSION) - CWARN("Connection from %s: old protocol version 0x%x\n", - libcfs_nid2str(nid), msg->ibm_version); - - if (msg->ibm_type != IBNAL_MSG_CONNREQ) { - CERROR("Can't accept %s: bad request type %d (%d expected)\n", - libcfs_nid2str(nid), msg->ibm_type, IBNAL_MSG_CONNREQ); - kibnal_reject(nid, cep, IBNAL_REJECT_FATAL); - return -EPROTO; - } - - if (msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid) { - CERROR("Can't accept %s: bad dst NID %s (%s expected)\n", - libcfs_nid2str(nid), - libcfs_nid2str(msg->ibm_dstnid), - libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - kibnal_reject(nid, cep, IBNAL_REJECT_FATAL); - return -EPROTO; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE || - msg->ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE || - msg->ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Reject %s: q %d sz %d frag %d, (%d %d %d expected)\n", - libcfs_nid2str(nid), - msg->ibm_u.connparams.ibcp_queue_depth, - msg->ibm_u.connparams.ibcp_max_msg_size, - msg->ibm_u.connparams.ibcp_max_frags, - IBNAL_MSG_QUEUE_SIZE, - IBNAL_MSG_SIZE, - IBNAL_MAX_RDMA_FRAGS); - kibnal_reject(nid, cep, IBNAL_REJECT_FATAL); - return -EPROTO; - } - - conn = kibnal_create_conn(nid, msg->ibm_version); - if (conn == NULL) { - kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES); - return -ENOMEM; - } - - /* assume 'nid' is a new peer */ - rc = kibnal_create_peer(&peer, nid); - if (rc != 0) { - kibnal_conn_decref(conn); - kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES); - return -ENOMEM; - } - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (kibnal_data.kib_listener_cep == NULL) { /* shutdown started */ - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_peer_decref(peer); - kibnal_conn_decref(conn); - kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES); - return -ESHUTDOWN; - } - - peer2 = kibnal_find_peer_locked(nid); - if (peer2 == NULL) { - /* peer table takes my ref on peer */ - list_add_tail (&peer->ibp_list, kibnal_nid2peerlist(nid)); - LASSERT (peer->ibp_connecting == 0); - } else { - kibnal_peer_decref(peer); - peer = peer2; - - if (peer->ibp_connecting != 0 && - peer->ibp_nid < kibnal_data.kib_ni->ni_nid) { - /* Resolve concurrent connection attempts in favour of - * the higher NID */ - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - kibnal_conn_decref(conn); - kibnal_reject(nid, cep, IBNAL_REJECT_CONN_RACE); - return -EALREADY; - } - } - - kibnal_peer_addref(peer); /* +1 ref for conn */ - peer->ibp_accepting++; - - kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING); - conn->ibc_peer = peer; - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBNAL_RX_MSGS); - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - *connp = conn; - return 0; -} - -void -kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg) -{ - - CM_REQUEST_INFO *req = &info->Info.Request; - CM_REPLY_INFO *rep; - kib_conn_t *conn; - FSTATUS frc; - int rc; - - LASSERT(arg == NULL); /* no conn yet for passive */ - - CDEBUG(D_NET, "%x\n", info->Status); - - if (info->Status == FCM_CONNECT_CANCEL) { - up(&kibnal_data.kib_listener_signal); - return; - } - - LASSERT (info->Status == FCM_CONNECT_REQUEST); - - rc = kibnal_accept(&conn, cep, (kib_msg_t *)req->PrivateData, - CM_REQUEST_INFO_USER_LEN); - if (rc != 0) /* kibnal_accept has rejected */ - return; - - conn->ibc_cvars->cv_path = req->PathInfo.Path; - - rc = kibnal_conn_rts(conn, - req->CEPInfo.QPN, - req->CEPInfo.OfferedInitiatorDepth, - req->CEPInfo.OfferedResponderResources, - req->CEPInfo.StartingPSN); - if (rc != 0) { - kibnal_reject(conn->ibc_peer->ibp_nid, cep, - IBNAL_REJECT_NO_RESOURCES); - kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ECONNABORTED); - return; - } - - memset(&conn->ibc_cvars->cv_cmci, 0, sizeof(conn->ibc_cvars->cv_cmci)); - rep = &conn->ibc_cvars->cv_cmci.Info.Reply; - - rep->QPN = conn->ibc_cvars->cv_qpattrs.QPNumber; - rep->QKey = conn->ibc_cvars->cv_qpattrs.Qkey; - rep->StartingPSN = conn->ibc_cvars->cv_qpattrs.RecvPSN; - rep->EndToEndFlowControl = conn->ibc_cvars->cv_qpattrs.FlowControl; - rep->ArbInitiatorDepth = conn->ibc_cvars->cv_qpattrs.InitiatorDepth; - rep->ArbResponderResources = conn->ibc_cvars->cv_qpattrs.ResponderResources; - rep->TargetAckDelay = kibnal_data.kib_hca_attrs.LocalCaAckDelay; - rep->FailoverAccepted = IBNAL_FAILOVER_ACCEPTED; - rep->RnRRetryCount = req->CEPInfo.RnrRetryCount; - - CLASSERT (CM_REPLY_INFO_USER_LEN >= - offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)); - - kibnal_pack_connmsg((kib_msg_t *)rep->PrivateData, - conn->ibc_version, - CM_REPLY_INFO_USER_LEN, - IBNAL_MSG_CONNACK, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation); - - LASSERT (conn->ibc_cep == NULL); - kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING); - - frc = iba_cm_accept(cep, - &conn->ibc_cvars->cv_cmci, - NULL, - kibnal_cm_passive_callback, conn, - &conn->ibc_cep); - - if (frc == FSUCCESS || frc == FPENDING) - return; - - CERROR("iba_cm_accept(%s) failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ECONNABORTED); -} - -void -kibnal_check_connreply(kib_conn_t *conn, CM_REPLY_INFO *rep) -{ - kib_msg_t *msg = (kib_msg_t *)rep->PrivateData; - lnet_nid_t nid = conn->ibc_peer->ibp_nid; - FSTATUS frc; - int rc; - - rc = kibnal_unpack_msg(msg, conn->ibc_version, CM_REPLY_INFO_USER_LEN); - if (rc != 0) { - CERROR ("Error %d unpacking connack from %s\n", - rc, libcfs_nid2str(nid)); - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO); - return; - } - - if (msg->ibm_type != IBNAL_MSG_CONNACK) { - CERROR("Bad connack request type %d (%d expected) from %s\n", - msg->ibm_type, IBNAL_MSG_CONNREQ, - libcfs_nid2str(msg->ibm_srcnid)); - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO); - return; - } - - if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || - msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR("Stale connack from %s(%s): %s(%s), "LPX64"("LPX64")\n", - libcfs_nid2str(msg->ibm_srcnid), - libcfs_nid2str(conn->ibc_peer->ibp_nid), - libcfs_nid2str(msg->ibm_dstnid), - libcfs_nid2str(kibnal_data.kib_ni->ni_nid), - msg->ibm_dststamp, kibnal_data.kib_incarnation); - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ESTALE); - return; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE || - msg->ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE || - msg->ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Reject %s: q %d sz %d frag %d, (%d %d %d expected)\n", - libcfs_nid2str(msg->ibm_srcnid), - msg->ibm_u.connparams.ibcp_queue_depth, - msg->ibm_u.connparams.ibcp_max_msg_size, - msg->ibm_u.connparams.ibcp_max_frags, - IBNAL_MSG_QUEUE_SIZE, - IBNAL_MSG_SIZE, - IBNAL_MAX_RDMA_FRAGS); - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO); - return; - } - - CDEBUG(D_NET, "Connection %s REP_RECEIVED.\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBNAL_RX_MSGS); - - rc = kibnal_conn_rts(conn, - rep->QPN, - rep->ArbInitiatorDepth, - rep->ArbResponderResources, - rep->StartingPSN); - if (rc != 0) { - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_NO_RESOURCES); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EIO); - return; - } - - memset(&conn->ibc_cvars->cv_cmci, 0, sizeof(conn->ibc_cvars->cv_cmci)); - - frc = iba_cm_accept(conn->ibc_cep, - &conn->ibc_cvars->cv_cmci, - NULL, NULL, NULL, NULL); - - if (frc == FCM_CONNECT_ESTABLISHED) { - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, 0); - return; - } - - CERROR("Connection %s CMAccept failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ECONNABORTED); -} - -void -kibnal_cm_active_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg) -{ - kib_conn_t *conn = arg; - - CDEBUG(D_NET, "status 0x%x\n", info->Status); - - switch (info->Status) { - default: - CERROR("unknown status %d on Connection %s\n", - info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - LBUG(); - break; - - case FCM_CONNECT_TIMEOUT: - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ETIMEDOUT); - break; - - case FCM_CONNECT_REJECT: - kibnal_check_connreject(conn, IBNAL_CONN_ACTIVE, - &info->Info.Reject); - break; - - case FCM_CONNECT_REPLY: - kibnal_check_connreply(conn, &info->Info.Reply); - break; - - case FCM_DISCONNECT_REQUEST: - case FCM_DISCONNECT_REPLY: - case FCM_DISCONNECTED: - kibnal_cm_disconnect_callback(conn, info); - break; - } -} - -void -dump_path_records(PATH_RESULTS *results) -{ - IB_PATH_RECORD *path; - int i; - - for (i = 0; i < results->NumPathRecords; i++) { - path = &results->PathRecords[i]; - CDEBUG(D_NET, "%d: sgid "LPX64":"LPX64" dgid " - LPX64":"LPX64" pkey %x\n", - i, - path->SGID.Type.Global.SubnetPrefix, - path->SGID.Type.Global.InterfaceID, - path->DGID.Type.Global.SubnetPrefix, - path->DGID.Type.Global.InterfaceID, - path->P_Key); - } -} - -void -kibnal_pathreq_callback (void *arg, QUERY *qry, - QUERY_RESULT_VALUES *qrslt) -{ - IB_CA_ATTRIBUTES *ca_attr = &kibnal_data.kib_hca_attrs; - kib_conn_t *conn = arg; - CM_REQUEST_INFO *req = &conn->ibc_cvars->cv_cmci.Info.Request; - PATH_RESULTS *path = (PATH_RESULTS *)qrslt->QueryResult; - FSTATUS frc; - - if (qrslt->Status != FSUCCESS || - qrslt->ResultDataSize < sizeof(*path)) { - CDEBUG (D_NETERROR, "pathreq %s failed: status %d data size %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - qrslt->Status, qrslt->ResultDataSize); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); - return; - } - - if (path->NumPathRecords < 1) { - CDEBUG (D_NETERROR, "pathreq %s failed: no path records\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); - return; - } - - //dump_path_records(path); - conn->ibc_cvars->cv_path = path->PathRecords[0]; - - LASSERT (conn->ibc_cep == NULL); - - conn->ibc_cep = kibnal_create_cep(conn->ibc_peer->ibp_nid); - if (conn->ibc_cep == NULL) { - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ENOMEM); - return; - } - - memset(req, 0, sizeof(*req)); - req->SID = conn->ibc_cvars->cv_svcrec.RID.ServiceID; - req->CEPInfo.CaGUID = kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx]; - req->CEPInfo.EndToEndFlowControl = IBNAL_EE_FLOW; - req->CEPInfo.PortGUID = conn->ibc_cvars->cv_path.SGID.Type.Global.InterfaceID; - req->CEPInfo.RetryCount = IBNAL_RETRY; - req->CEPInfo.RnrRetryCount = IBNAL_RNR_RETRY; - req->CEPInfo.AckTimeout = IBNAL_ACK_TIMEOUT; - req->CEPInfo.StartingPSN = IBNAL_STARTING_PSN; - req->CEPInfo.QPN = conn->ibc_cvars->cv_qpattrs.QPNumber; - req->CEPInfo.QKey = conn->ibc_cvars->cv_qpattrs.Qkey; - req->CEPInfo.OfferedResponderResources = ca_attr->MaxQPResponderResources; - req->CEPInfo.OfferedInitiatorDepth = ca_attr->MaxQPInitiatorDepth; - req->PathInfo.bSubnetLocal = IBNAL_LOCAL_SUB; - req->PathInfo.Path = conn->ibc_cvars->cv_path; - - CLASSERT (CM_REQUEST_INFO_USER_LEN >= - offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)); - - kibnal_pack_connmsg((kib_msg_t *)req->PrivateData, - conn->ibc_version, - CM_REQUEST_INFO_USER_LEN, - IBNAL_MSG_CONNREQ, - conn->ibc_peer->ibp_nid, 0); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto test */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - ((kib_msg_t *)req->PrivateData)->ibm_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - ((kib_msg_t *)req->PrivateData)->ibm_magic = - LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - /* Flag I'm getting involved with the CM... */ - kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING); - - /* cm callback gets my conn ref */ - frc = iba_cm_connect(conn->ibc_cep, req, - kibnal_cm_active_callback, conn); - if (frc == FPENDING || frc == FSUCCESS) - return; - - CERROR ("Connect %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); -} - -void -kibnal_dump_service_records(SERVICE_RECORD_RESULTS *results) -{ - IB_SERVICE_RECORD *svc; - int i; - - for (i = 0; i < results->NumServiceRecords; i++) { - svc = &results->ServiceRecords[i]; - CDEBUG(D_NET, "%d: sid "LPX64" gid "LPX64":"LPX64" pkey %x\n", - i, - svc->RID.ServiceID, - svc->RID.ServiceGID.Type.Global.SubnetPrefix, - svc->RID.ServiceGID.Type.Global.InterfaceID, - svc->RID.ServiceP_Key); - } -} - -void -kibnal_service_get_callback (void *arg, QUERY *qry, - QUERY_RESULT_VALUES *qrslt) -{ - kib_conn_t *conn = arg; - SERVICE_RECORD_RESULTS *svc; - FSTATUS frc; - - if (qrslt->Status != FSUCCESS || - qrslt->ResultDataSize < sizeof(*svc)) { - CDEBUG (D_NETERROR, "Lookup %s failed: status %d data size %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - qrslt->Status, qrslt->ResultDataSize); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); - return; - } - - svc = (SERVICE_RECORD_RESULTS *)qrslt->QueryResult; - if (svc->NumServiceRecords < 1) { - CDEBUG (D_NETERROR, "lookup %s failed: no service records\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); - return; - } - - //kibnal_dump_service_records(svc); - conn->ibc_cvars->cv_svcrec = svc->ServiceRecords[0]; - - qry = &conn->ibc_cvars->cv_query; - memset(qry, 0, sizeof(*qry)); - - qry->OutputType = OutputTypePathRecord; - qry->InputType = InputTypePortGuidPair; - - qry->InputValue.PortGuidPair.SourcePortGuid = - kibnal_data.kib_port_guid; - qry->InputValue.PortGuidPair.DestPortGuid = - conn->ibc_cvars->cv_svcrec.RID.ServiceGID.Type.Global.InterfaceID; - - /* kibnal_pathreq_callback gets my conn ref */ - frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - qry, - kibnal_pathreq_callback, - &kibnal_data.kib_sdretry, - conn); - if (frc == FPENDING) - return; - - CERROR ("pathreq %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); -} - -void -kibnal_connect_peer (kib_peer_t *peer) -{ - QUERY *qry; - FSTATUS frc; - kib_conn_t *conn; - - LASSERT (peer->ibp_connecting != 0); - - conn = kibnal_create_conn(peer->ibp_nid, peer->ibp_version); - if (conn == NULL) { - CERROR ("Can't allocate conn\n"); - kibnal_peer_connect_failed(peer, IBNAL_CONN_ACTIVE, -ENOMEM); - return; - } - - conn->ibc_peer = peer; - kibnal_peer_addref(peer); - - qry = &conn->ibc_cvars->cv_query; - memset(qry, 0, sizeof(*qry)); - - qry->OutputType = OutputTypeServiceRecord; - qry->InputType = InputTypeServiceRecord; - - qry->InputValue.ServiceRecordValue.ComponentMask = - KIBNAL_SERVICE_KEY_MASK; - kibnal_set_service_keys( - &qry->InputValue.ServiceRecordValue.ServiceRecord, - peer->ibp_nid); - - /* kibnal_service_get_callback gets my conn ref */ - frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - qry, - kibnal_service_get_callback, - &kibnal_data.kib_sdretry, - conn); - if (frc == FPENDING) - return; - - CERROR("Lookup %s failed: %d\n", libcfs_nid2str(peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); -} - -int -kibnal_check_txs (kib_conn_t *conn, struct list_head *txs) -{ - kib_tx_t *tx; - struct list_head *ttmp; - int timed_out = 0; - - spin_lock(&conn->ibc_lock); - - list_for_each (ttmp, txs) { - tx = list_entry (ttmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - if (time_after_eq (jiffies, tx->tx_deadline)) { - timed_out = 1; - break; - } - } - - spin_unlock(&conn->ibc_lock); - return timed_out; -} - -int -kibnal_conn_timed_out (kib_conn_t *conn) -{ - return kibnal_check_txs(conn, &conn->ibc_tx_queue) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) || - kibnal_check_txs(conn, &conn->ibc_active_txs); -} - -void -kibnal_check_peers (int idx) -{ - rwlock_t *rwlock = &kibnal_data.kib_global_lock; - struct list_head *peers = &kibnal_data.kib_peers[idx]; - struct list_head *ptmp; - kib_peer_t *peer; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - - again: - /* NB. We expect to have a look at all the peers and not find any - * rdmas to time out, so we just use a shared lock while we - * take a look... */ - read_lock_irqsave(rwlock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - - if (peer->ibp_passivewait) { - LASSERT (list_empty(&peer->ibp_conns)); - - if (!time_after_eq(jiffies, - peer->ibp_passivewait_deadline)) - continue; - - kibnal_peer_addref(peer); /* ++ ref for me... */ - read_unlock_irqrestore(rwlock, flags); - - kibnal_peer_connect_failed(peer, IBNAL_CONN_WAITING, - -ETIMEDOUT); - kibnal_peer_decref(peer); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - - list_for_each (ctmp, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); - - /* In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - kibnal_check_sends(conn); - - if (!kibnal_conn_timed_out(conn)) - continue; - - /* Handle timeout by closing the whole connection. We - * can only be sure RDMA activity has ceased once the - * QP has been modified. */ - - kibnal_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(rwlock, flags); - - CERROR("Timed out RDMA with %s\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_close_conn (conn, -ETIMEDOUT); - kibnal_conn_decref(conn); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(rwlock, flags); -} - -void -kibnal_disconnect_conn (kib_conn_t *conn) -{ - FSTATUS frc; - - LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECTING); - - kibnal_conn_disconnected(conn); - - frc = iba_cm_disconnect(conn->ibc_cep, NULL, NULL); - switch (frc) { - case FSUCCESS: - break; - - case FINSUFFICIENT_RESOURCES: - CERROR("ENOMEM disconnecting %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* This might cause the module to become unloadable since the - * FCM_DISCONNECTED callback is still outstanding */ - break; - - default: - CERROR("Unexpected error disconnecting %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - LBUG(); - } - - kibnal_peer_notify(conn->ibc_peer); -} - -int -kibnal_connd (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - kib_conn_t *conn; - kib_peer_t *peer; - int timeout; - int i; - int did_something; - int peer_index = 0; - unsigned long deadline = jiffies; - - cfs_daemonize ("kibnal_connd"); - cfs_block_allsigs (); - - init_waitqueue_entry (&wait, current); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - while (!kibnal_data.kib_shutdown) { - did_something = 0; - - if (!list_empty (&kibnal_data.kib_connd_zombies)) { - conn = list_entry (kibnal_data.kib_connd_zombies.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - - kibnal_destroy_conn(conn); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_conns)) { - conn = list_entry (kibnal_data.kib_connd_conns.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - - kibnal_disconnect_conn(conn); - kibnal_conn_decref(conn); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_peers)) { - peer = list_entry (kibnal_data.kib_connd_peers.next, - kib_peer_t, ibp_connd_list); - - list_del_init (&peer->ibp_connd_list); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - - kibnal_connect_peer (peer); - kibnal_peer_decref (peer); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - /* careful with the jiffy wrap... */ - while ((timeout = (int)(deadline - jiffies)) <= 0) { - const int n = 4; - const int p = 1; - int chunk = kibnal_data.kib_peer_hash_size; - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if (*kibnal_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kibnal_tunables.kib_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kibnal_check_peers (peer_index); - peer_index = (peer_index + 1) % - kibnal_data.kib_peer_hash_size; - } - - deadline += p * HZ; - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - } - - if (did_something) - continue; - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - - if (!kibnal_data.kib_shutdown && - list_empty (&kibnal_data.kib_connd_conns) && - list_empty (&kibnal_data.kib_connd_peers)) - schedule_timeout (timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_thread_fini (); - return (0); -} - - -void -kibnal_hca_async_callback (void *hca_arg, IB_EVENT_RECORD *ev) -{ - /* XXX flesh out. this seems largely for async errors */ - CERROR("type: %d code: %u\n", ev->EventType, ev->EventCode); -} - -void -kibnal_hca_callback (void *hca_arg, void *cq_arg) -{ - unsigned long flags; - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - kibnal_data.kib_ready = 1; - wake_up(&kibnal_data.kib_sched_waitq); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); -} - -int -kibnal_scheduler(void *arg) -{ - long id = (long)arg; - wait_queue_t wait; - char name[16]; - FSTATUS frc; - FSTATUS frc2; - IB_WORK_COMPLETION wc; - kib_rx_t *rx; - unsigned long flags; - __u64 rxseq = 0; - int busy_loops = 0; - - snprintf(name, sizeof(name), "kibnal_sd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - - while (!kibnal_data.kib_shutdown) { - if (busy_loops++ >= IBNAL_RESCHED) { - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - our_cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - } - - if (kibnal_data.kib_ready && - !kibnal_data.kib_checking_cq) { - /* take ownership of completion polling */ - kibnal_data.kib_checking_cq = 1; - /* Assume I'll exhaust the CQ */ - kibnal_data.kib_ready = 0; - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - frc = iba_poll_cq(kibnal_data.kib_cq, &wc); - if (frc == FNOT_DONE) { - /* CQ empty */ - frc2 = iba_rearm_cq(kibnal_data.kib_cq, - CQEventSelNextWC); - LASSERT (frc2 == FSUCCESS); - } - - if (frc == FSUCCESS && - kibnal_wreqid2type(wc.WorkReqId) == IBNAL_WID_RX) { - rx = (kib_rx_t *)kibnal_wreqid2ptr(wc.WorkReqId); - - /* Grab the RX sequence number NOW before - * anyone else can get an RX completion */ - rxseq = rx->rx_conn->ibc_rxseq++; - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - /* give up ownership of completion polling */ - kibnal_data.kib_checking_cq = 0; - - if (frc == FNOT_DONE) - continue; - - LASSERT (frc == FSUCCESS); - /* Assume there's more: get another scheduler to check - * while I handle this completion... */ - - kibnal_data.kib_ready = 1; - wake_up(&kibnal_data.kib_sched_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - switch (kibnal_wreqid2type(wc.WorkReqId)) { - case IBNAL_WID_RX: - kibnal_rx_complete(&wc, rxseq); - break; - - case IBNAL_WID_TX: - kibnal_tx_complete(&wc); - break; - - case IBNAL_WID_RDMA: - /* We only get RDMA completion notification if - * it fails. So we just ignore them completely - * because... - * - * 1) If an RDMA fails, all subsequent work - * items, including the final SEND will fail - * too, so I'm still guaranteed to notice that - * this connection is hosed. - * - * 2) It's positively dangerous to look inside - * the tx descriptor obtained from an RDMA work - * item. As soon as I drop the kib_sched_lock, - * I give a scheduler on another CPU a chance - * to get the final SEND completion, so the tx - * descriptor can get freed as I inspect it. */ - CERROR ("RDMA failed: %d\n", wc.Status); - break; - - default: - LBUG(); - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - continue; - } - - /* Nothing to do; sleep... */ - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kibnal_data.kib_sched_waitq, &wait); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - schedule(); - - remove_wait_queue(&kibnal_data.kib_sched_waitq, &wait); - set_current_state(TASK_RUNNING); - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - } - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - - kibnal_thread_fini(); - return (0); -} diff --git a/lnet/klnds/iiblnd/iiblnd_modparams.c b/lnet/klnds/iiblnd/iiblnd_modparams.c deleted file mode 100644 index 908314112ceded7b03cd07764763e949fca0ec04..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/iiblnd_modparams.c +++ /dev/null @@ -1,270 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "iiblnd.h" - -static char *ipif_basename = "ib"; -CFS_MODULE_PARM(ipif_basename, "s", charp, 0444, - "IPoIB interface base name"); - -static char *service_name = "iiblnd"; -CFS_MODULE_PARM(service_name, "s", charp, 0444, - "IB service name"); - -static int service_number = 0x11b9a2; -CFS_MODULE_PARM(service_number, "i", int, 0444, - "IB service number"); - -static int min_reconnect_interval = 1; -CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644, - "minimum connection retry interval (seconds)"); - -static int max_reconnect_interval = 60; -CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644, - "maximum connection retry interval (seconds)"); - -static int concurrent_peers = 1152; -CFS_MODULE_PARM(concurrent_peers, "i", int, 0444, - "maximum number of peers that may connect"); - -static int cksum = 0; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of message descriptors"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int sd_retries = 8; -CFS_MODULE_PARM(sd_retries, "i", int, 0444, - "# times to retry SD queries"); - -static int keepalive = 100; -CFS_MODULE_PARM(keepalive, "i", int, 0644, - "Idle time in seconds before sending a keepalive"); - -static int concurrent_sends = IBNAL_RX_MSGS; -CFS_MODULE_PARM(concurrent_sends, "i", int, 0644, - "Send work queue sizing"); - -kib_tunables_t kibnal_tunables = { - .kib_ipif_basename = &ipif_basename, - .kib_service_name = &service_name, - .kib_service_number = &service_number, - .kib_min_reconnect_interval = &min_reconnect_interval, - .kib_max_reconnect_interval = &max_reconnect_interval, - .kib_concurrent_peers = &concurrent_peers, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_keepalive = &keepalive, - .kib_ntx = &ntx, - .kib_credits = &credits, - .kib_peercredits = &peer_credits, - .kib_sd_retries = &sd_retries, - .kib_concurrent_sends = &concurrent_sends, -}; - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - -/* NB max_size specified for proc_dostring entries only needs to be big enough - * not to truncate the printout; it only needs to be the actual size of the - * string buffer if we allow writes (and we don't) */ - -static cfs_sysctl_table_t kibnal_ctl_table[] = { - { - .ctl_name = 1, - .procname = "ipif_basename", - .data = &ipif_basename, - .maxlen = 1024, - .mode = 0444, - .proc_handler = &proc_dostring - }, - { - .ctl_name = 2, - .procname = "service_name", - .data = &service_name, - .maxlen = 1024, - .mode = 0444, - .proc_handler = &proc_dostring - }, - { - .ctl_name = 3, - .procname = "service_number", - .data = &service_number, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 4, - .procname = "min_reconnect_interval", - .data = &min_reconnect_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 5, - .procname = "max_reconnect_interval", - .data = &max_reconnect_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 6, - .procname = "concurrent_peers", - .data = &concurrent_peers, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 7, - .procname = "cksum", - .data = &cksum, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 8, - .procname = "timeout", - .data = &timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 9, - .procname = "ntx", - .data = &ntx, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 10, - .procname = "credits", - .data = &credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 11, - .procname = "peer_credits", - .data = &peer_credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 12, - .procname = "sd_retries", - .data = &sd_retries, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 13, - .procname = "keepalive", - .data = &keepalive, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 14, - .procname = "concurrent_sends", - .data = &concurrent_sends, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - {0} -}; - -static cfs_sysctl_table_t kibnal_top_ctl_table[] = { - { - .ctl_name = 203, - .procname = "openibnal", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = kibnal_ctl_table - }, - {0} -}; - -int -kibnal_tunables_init () -{ - kibnal_tunables.kib_sysctl = - cfs_register_sysctl_table(kibnal_top_ctl_table, 0); - - if (kibnal_tunables.kib_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - if (*kibnal_tunables.kib_concurrent_sends > IBNAL_RX_MSGS) - *kibnal_tunables.kib_concurrent_sends = IBNAL_RX_MSGS; - if (*kibnal_tunables.kib_concurrent_sends < IBNAL_MSG_QUEUE_SIZE) - *kibnal_tunables.kib_concurrent_sends = IBNAL_MSG_QUEUE_SIZE; - - return 0; -} - -void -kibnal_tunables_fini () -{ - if (kibnal_tunables.kib_sysctl != NULL) - cfs_unregister_sysctl_table(kibnal_tunables.kib_sysctl); -} - -#else - -int -kibnal_tunables_init () -{ - return 0; -} - -void -kibnal_tunables_fini () -{ -} - -#endif diff --git a/lnet/klnds/mxlnd/.cvsignore b/lnet/klnds/mxlnd/.cvsignore deleted file mode 100644 index 26bf56c4c271a5a4da943cb6c1beaebe3c8bdf49..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend - diff --git a/lnet/klnds/mxlnd/Makefile.in b/lnet/klnds/mxlnd/Makefile.in deleted file mode 100644 index 378dbdd346591d0a9ac89ae7d651ed2c8b40e9b8..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kmxlnd -kmxlnd-objs := mxlnd.o mxlnd_cb.o mxlnd_modparams.o - -EXTRA_POST_CFLAGS := @MXCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/mxlnd/README b/lnet/klnds/mxlnd/README deleted file mode 100644 index eb796088d687ee2690028668b1f6dd4af6077aa2..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/README +++ /dev/null @@ -1,190 +0,0 @@ -************************************************************************* -* * -* Myrinet Express Lustre Networking Driver (MXLND) documentation * -* * -************************************************************************* - -README of MXLND - -MXLND provides support for Myricom's Myrinet Express (MX) communication -layer in Lustre. - -MXLND may be used with either MX-10G or MX-2G. See MX's README for -supported NICs. - -Table of Contents: - I. Installation - 1. Configuring and compiling - 2. Module Parameters - II. MXLND Performance - III. Caveats - 1. Systems with different page sizes - 2. Multi-homing - 3. MX endpoint collision - IV. License - V. Support - -================ -I. Installation -================ - -MXLND is supported on Linux 2.6. It may be possible to run it on 2.4, -but it has not been tested. MXLND requires Myricom's MX version 1.2.1 -or higher. See MX's README for the supported list of processors. - -1. Configuring and compiling - -MXLND should be already integrated into the Lustre build process. To -build MXLND, you will need to set the path to your MX installation -in Lustre's ./configure: - - --with-mx=/opt/mx - -replacing /opt with the actual path. Configure will check to ensure that -the MX version has the required functions. If not, it will fail to build. -To check if MXLND built, look for: - - checking whether to enable Myrinet MX support... yes - -in configure's output or the presence of Makefile in -$LUSTRE/lnet/klnds/mxlnd. - -2. Module Parameters - -MXLND supports a number of load-time parameters using Linux's module -parameter system. On our test systems, we created the following file: - - /etc/modprobe.d/kmxlnd - -On some (older?) systems, you may need to modify /etc/modprobe.conf. - -The available options are: - - n_waitd # of completion daemons - max_peers maximum number of peers that may connect - cksum set non-zero to enable small message (< 4KB) checksums - ntx # of total tx message descriptors - credits # concurrent sends to a single peer - board index value of the Myrinet board (NIC) - ep_id MX endpoint ID - polling Use 0 to block (wait). A value > 0 will poll that many times before blocking - hosts IP-to-hostname resolution file - -Of these, only hosts is required. It must be the absolute path to the -MXLND hosts file. For example: - - options kmxlnd hosts=/etc/hosts.mxlnd - -The file format for the hosts file is as follows: - -IP HOST BOARD EP_ID - -The values must be space and/or tab separated where: - - IP is a valid IPv4 address - HOST is the name returned by `hostname` on that machine - BOARD is the index of the Myricom NIC (0 for the first card, etc.) - EP_ID is the MX endpoint ID - -You may want to vary the remaining options to obtain the optimal performance -for your platform. - - n_waitd sets the number of threads that process completed MX requests -(sends and receives). In our testing, the default of 1 performed best. - - max_peers tells MXLND the upper limit of machines that it will need to -communicate with. This affects how many receives it will pre-post and each -receive will use one page of memory. Ideally, on clients, this value will -be equal to the total number of Lustre servers (MDS and OSS). On servers, -it needs to equal the total number of machines in the storage system. - - cksum turns on small message checksums. It can be used to aid in trouble- -shooting. MX also provides an optional checksumming feature which can check -all messages (large and small). See the MX README for details. - - ntx is the number of total sends in flight from this machine. In actuality, -MXLND reserves half of them for connect messages so make this value twice as large -as you want for the total number of sends in flight. - - credits is the number of in-flight messages for a specific peer. This is part -of the flow-control system in Lustre. Increasing this value may improve performance -but it requires more memory since each message requires at least one page. - - board is the index of the Myricom NIC. Hosts can have multiple Myricom NICs -and this identifies which one MXLND should use. This value must match the board -value in your MXLND hosts file for this host. - - ep_id is the MX endpoint ID. Each process that uses MX is required to have at -least one MX endpoint to access the MX library and NIC. The ID is a simple index -starting at 0. This value must match the endpoint ID value in your MXLND hosts -file for this host. - - polling determines whether this host will poll or block for MX request com- -pletions. A value of 0 blocks and any positive value will poll that many times -before blocking. Since polling increases CPU usage, we suggest you set this to -0 on the client and experiment with different values for servers. - -===================== -II. MXLND Performance -===================== - -On MX-2G systems, MXLND should easily saturate the link and use minimal CPU -(5-10% for read and write operations). On MX-10G systems, MXLND can saturate -the link and use moderate CPU resources (20-30% for read and write operations). -MX-10G relies on PCI-Express which is relatively new and performance varies -considerably by processor, motherboard and PCI-E chipset. Refer to Myricom's -website for the latest DMA read/write performance results by motherboard. The -DMA results will place an upper-bound on MXLND performance. - -============ -III. Caveats -============ - -1. Systems with different page sizes - -MXLND will set the maximum small message size equal to the kernel's page size. -This means that machines running MXLND that have different page sizes are not -able to communicate with each other. If you wish to run MXLND in this case, -send email to help@myri.com. - -2. Multi-homing - -At this time, the MXLND does not support more than one interface at a time. -Thus, a single Lustre router cannot route between two MX-10G, between two -MX-2G, or between MX-10G and MX-2G fabrics. - -3. MX endpoint collision - -Each process that uses MX is required to have at least one MX endpoint to -access the MX library and NIC. Other processes may need to use MX and no two -processes can use the same endpoint ID. MPICH-MX dynamically chooses one at -MPI startup and should not interfere with MXLND. Sockets-MX, on the other hand, -is hard coded to use 0 for its ID. If it is possible that anyone will want to -run Sockets-MX on this system, use a non-0 value for MXLND's endpoint ID. - - -=========== -IV. License -=========== - -MXLND is copyright (C) 2006 of Myricom, Inc. - -MXLND is part of Lustre, http://www.lustre.org. - -MXLND is free software; you can redistribute it and/or modify it under the -terms of version 2 of the GNU General Public License as published by the Free -Software Foundation. - -MXLND is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, -Cambridge, MA 02139, USA. - -========== -V. Support -========== - -If you have questions about MXLND, please contact help@myri.com. diff --git a/lnet/klnds/mxlnd/autoMakefile.am b/lnet/klnds/mxlnd/autoMakefile.am deleted file mode 100644 index e5feea75fe25311fde5198c611c58e9b7ef99547..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_MXLND -modulenet_DATA = kmxlnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kmxlnd-objs:%.o=%.c) mxlnd.h mxlnd_wire.h diff --git a/lnet/klnds/mxlnd/mxlnd.c b/lnet/klnds/mxlnd/mxlnd.c deleted file mode 100644 index 141ad20e39e7edcc05e40a5b7132e57ae823863b..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd.c +++ /dev/null @@ -1,937 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Scott Atchley <atchley at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "mxlnd.h" - -lnd_t the_kmxlnd = { - .lnd_type = MXLND, - .lnd_startup = mxlnd_startup, - .lnd_shutdown = mxlnd_shutdown, - .lnd_ctl = mxlnd_ctl, - .lnd_send = mxlnd_send, - .lnd_recv = mxlnd_recv, -}; - -kmx_data_t kmxlnd_data; - -/** - * mxlnd_ctx_free - free ctx struct - * @ctx - a kmx_peer pointer - * - * The calling function should remove the ctx from the ctx list first - * then free it. - */ -void -mxlnd_ctx_free(struct kmx_ctx *ctx) -{ - if (ctx == NULL) return; - - if (ctx->mxc_page != NULL) { - __free_page(ctx->mxc_page); - spin_lock(&kmxlnd_data.kmx_global_lock); - kmxlnd_data.kmx_mem_used -= MXLND_EAGER_SIZE; - spin_unlock(&kmxlnd_data.kmx_global_lock); - } - - if (ctx->mxc_seg_list != NULL) { - LASSERT(ctx->mxc_nseg > 0); - MXLND_FREE(ctx->mxc_seg_list, ctx->mxc_nseg * sizeof(mx_ksegment_t)); - } - - MXLND_FREE (ctx, sizeof (*ctx)); - return; -} - -/** - * mxlnd_ctx_alloc - allocate and initialize a new ctx struct - * @ctxp - address of a kmx_ctx pointer - * - * Returns 0 on success and -EINVAL, -ENOMEM on failure - */ -int -mxlnd_ctx_alloc(struct kmx_ctx **ctxp, enum kmx_req_type type) -{ - int ret = 0; - struct kmx_ctx *ctx = NULL; - - if (ctxp == NULL) return -EINVAL; - - MXLND_ALLOC(ctx, sizeof (*ctx)); - if (ctx == NULL) { - CDEBUG(D_NETERROR, "Cannot allocate ctx\n"); - return -ENOMEM; - } - memset(ctx, 0, sizeof(*ctx)); - spin_lock_init(&ctx->mxc_lock); - - ctx->mxc_type = type; - ctx->mxc_page = alloc_page (GFP_KERNEL); - if (ctx->mxc_page == NULL) { - CDEBUG(D_NETERROR, "Can't allocate page\n"); - ret = -ENOMEM; - goto failed; - } - spin_lock(&kmxlnd_data.kmx_global_lock); - kmxlnd_data.kmx_mem_used += MXLND_EAGER_SIZE; - spin_unlock(&kmxlnd_data.kmx_global_lock); - ctx->mxc_msg = (struct kmx_msg *)((char *)page_address(ctx->mxc_page)); - ctx->mxc_seg.segment_ptr = MX_PA_TO_U64(lnet_page2phys(ctx->mxc_page)); - ctx->mxc_state = MXLND_CTX_IDLE; - - *ctxp = ctx; - return 0; - -failed: - mxlnd_ctx_free(ctx); - return ret; -} - -/** - * mxlnd_ctx_init - reset ctx struct to the default values - * @ctx - a kmx_ctx pointer - */ -void -mxlnd_ctx_init(struct kmx_ctx *ctx) -{ - if (ctx == NULL) return; - - /* do not change mxc_type */ - ctx->mxc_incarnation = 0; - ctx->mxc_deadline = 0; - ctx->mxc_state = MXLND_CTX_IDLE; - /* ignore mxc_global_list */ - if (ctx->mxc_list.next != NULL && !list_empty(&ctx->mxc_list)) { - if (ctx->mxc_peer != NULL) spin_lock(&ctx->mxc_lock); - list_del_init(&ctx->mxc_list); - if (ctx->mxc_peer != NULL) spin_unlock(&ctx->mxc_lock); - } - /* ignore mxc_rx_list */ - /* ignore mxc_lock */ - ctx->mxc_nid = 0; - ctx->mxc_peer = NULL; - ctx->mxc_conn = NULL; - /* ignore mxc_msg */ - /* ignore mxc_page */ - ctx->mxc_lntmsg[0] = NULL; - ctx->mxc_lntmsg[1] = NULL; - ctx->mxc_msg_type = 0; - ctx->mxc_cookie = 0LL; - ctx->mxc_match = 0LL; - /* ctx->mxc_seg.segment_ptr points to mxc_page */ - ctx->mxc_seg.segment_length = 0; - if (ctx->mxc_seg_list != NULL) { - LASSERT(ctx->mxc_nseg > 0); - MXLND_FREE(ctx->mxc_seg_list, ctx->mxc_nseg * sizeof(mx_ksegment_t)); - } - ctx->mxc_seg_list = NULL; - ctx->mxc_nseg = 0; - ctx->mxc_nob = 0; - ctx->mxc_mxreq = NULL; - memset(&ctx->mxc_status, 0, sizeof(mx_status_t)); - /* ctx->mxc_get */ - /* ctx->mxc_put */ - - ctx->mxc_msg->mxm_type = 0; - ctx->mxc_msg->mxm_credits = 0; - ctx->mxc_msg->mxm_nob = 0; - ctx->mxc_msg->mxm_seq = 0; - - return; -} - -/** - * mxlnd_free_txs - free kmx_txs and associated pages - * - * Called from mxlnd_shutdown() - */ -void -mxlnd_free_txs(void) -{ - struct kmx_ctx *tx = NULL; - struct kmx_ctx *next = NULL; - - list_for_each_entry_safe(tx, next, &kmxlnd_data.kmx_txs, mxc_global_list) { - list_del_init(&tx->mxc_global_list); - mxlnd_ctx_free(tx); - } - return; -} - -/** - * mxlnd_init_txs - allocate tx descriptors then stash on txs and idle tx lists - * - * Called from mxlnd_startup() - * returns 0 on success, else -ENOMEM - */ -int -mxlnd_init_txs(void) -{ - int ret = 0; - int i = 0; - struct kmx_ctx *tx = NULL; - - for (i = 0; i < *kmxlnd_tunables.kmx_ntx; i++) { - ret = mxlnd_ctx_alloc(&tx, MXLND_REQ_TX); - if (ret != 0) { - mxlnd_free_txs(); - return ret; - } - mxlnd_ctx_init(tx); - /* in startup(), no locks required */ - list_add_tail(&tx->mxc_global_list, &kmxlnd_data.kmx_txs); - list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle); - } - return 0; -} - -/** - * mxlnd_free_rxs - free initial kmx_rx descriptors and associated pages - * - * Called from mxlnd_shutdown() - */ -void -mxlnd_free_rxs(void) -{ - struct kmx_ctx *rx = NULL; - struct kmx_ctx *next = NULL; - - list_for_each_entry_safe(rx, next, &kmxlnd_data.kmx_rxs, mxc_global_list) { - list_del_init(&rx->mxc_global_list); - mxlnd_ctx_free(rx); - } - return; -} - -/** - * mxlnd_init_rxs - allocate initial rx descriptors - * - * Called from startup(). We create MXLND_MAX_PEERS plus MXLND_NTX - * rx descriptors. We create one for each potential peer to handle - * the initial connect request. We create on for each tx in case the - * send requires a non-eager receive. - * - * Returns 0 on success, else -ENOMEM - */ -int -mxlnd_init_rxs(void) -{ - int ret = 0; - int i = 0; - struct kmx_ctx *rx = NULL; - - for (i = 0; i < (*kmxlnd_tunables.kmx_ntx + *kmxlnd_tunables.kmx_max_peers); i++) { - ret = mxlnd_ctx_alloc(&rx, MXLND_REQ_RX); - if (ret != 0) { - mxlnd_free_rxs(); - return ret; - } - mxlnd_ctx_init(rx); - /* in startup(), no locks required */ - list_add_tail(&rx->mxc_global_list, &kmxlnd_data.kmx_rxs); - list_add_tail(&rx->mxc_list, &kmxlnd_data.kmx_rx_idle); - } - return 0; -} - -/** - * mxlnd_free_peers - free peers - * - * Called from mxlnd_shutdown() - */ -void -mxlnd_free_peers(void) -{ - int i = 0; - struct kmx_peer *peer = NULL; - struct kmx_peer *next = NULL; - - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry_safe(peer, next, &kmxlnd_data.kmx_peers[i], mxp_peers) { - list_del_init(&peer->mxp_peers); - if (peer->mxp_conn) mxlnd_conn_decref(peer->mxp_conn); - mxlnd_peer_decref(peer); - } - } -} - -int -mxlnd_host_alloc(struct kmx_host **hostp) -{ - struct kmx_host *host = NULL; - - MXLND_ALLOC(host, sizeof (*host)); - if (host == NULL) { - CDEBUG(D_NETERROR, "Cannot allocate host\n"); - return -1; - } - memset(host, 0, sizeof(*host)); - spin_lock_init(&host->mxh_lock); - - *hostp = host; - - return 0; -} - -void -mxlnd_host_free(struct kmx_host *host) -{ - if (host == NULL) return; - - if (host->mxh_hostname != NULL) - MXLND_FREE(host->mxh_hostname, strlen(host->mxh_hostname) + 1); - - MXLND_FREE(host, sizeof(*host)); - return; -} - -/** - * mxlnd_free_hosts - free kmx_hosts - * - * Called from mxlnd_shutdown() - */ -void -mxlnd_free_hosts(void) -{ - struct kmx_host *host = NULL; - struct kmx_host *next = NULL; - - list_for_each_entry_safe(host, next, &kmxlnd_data.kmx_hosts, mxh_list) { - list_del_init(&host->mxh_list); - mxlnd_host_free(host); - } - return; -} - -#define xstr(s) #s -#define str(s) xstr(s) -#define MXLND_MAX_BOARD 4 /* we expect hosts to have fewer NICs than this */ -#define MXLND_MAX_EP_ID 16 /* we expect hosts to have less than this endpoints */ - -/* this parses a line that consists of: - * - * IP HOSTNAME BOARD ENDPOINT ID - * 169.192.0.113 mds01 0 3 - * - * By default MX uses the alias (short hostname). If you override - * it using mx_hostname to use the FQDN or some other name, the hostname - * here must match exactly. - */ - -/* MX_MAX_HOSTNAME_LEN = 80. See myriexpress.h */ -int -mxlnd_parse_line(char *line) -{ - int i = 0; - int ret = 0; - int len = 0; - u32 ip[4] = { 0, 0, 0, 0 }; - char hostname[MX_MAX_HOSTNAME_LEN]; - u32 board = -1; - u32 ep_id = -1; - struct kmx_host *host = NULL; - - if (line == NULL) return -1; - - len = strlen(line); - - if (len == 0) return -1; - - /* convert tabs to spaces */ - for (i = 0; i < len; i++) { - if (line[i] == '\t') line[i] = ' '; - } - - memset(&hostname, 0 , sizeof(hostname)); - ret = sscanf(line, "%d.%d.%d.%d %" str(MX_MAX_HOSTNAME_LEN) "s %d %d", - &ip[0], &ip[1], &ip[2], &ip[3], hostname, &board, &ep_id); - - if (ret != 7) { - return -1; - } - - /* check for valid values */ - /* we assume a valid IP address (all <= 255), number of NICs, - * and number of endpoint IDs */ - if (ip[0] > 255 || ip [1] > 255 || ip[2] > 255 || ip[3] > 255 || - board > MXLND_MAX_BOARD || ep_id > MXLND_MAX_EP_ID) { - CDEBUG(D_NETERROR, "Illegal value in \"%s\". Ignoring " - "this host.\n", line); - return -1; - } - - ret = mxlnd_host_alloc(&host); - if (ret != 0) return -1; - - host->mxh_addr = ((ip[0]<<24)|(ip[1]<<16)|(ip[2]<<8)|ip[3]); - len = strlen(hostname); - MXLND_ALLOC(host->mxh_hostname, len + 1); - if (host->mxh_hostname == NULL) { - mxlnd_host_free(host); - return -ENOMEM; - } - memset(host->mxh_hostname, 0, len + 1); - strncpy(host->mxh_hostname, hostname, len); - host->mxh_board = board; - host->mxh_ep_id = ep_id; - - spin_lock(&kmxlnd_data.kmx_hosts_lock); - list_add_tail(&host->mxh_list, &kmxlnd_data.kmx_hosts); - spin_unlock(&kmxlnd_data.kmx_hosts_lock); - - return 0; -} - -void -mxlnd_print_hosts(void) -{ -#if MXLND_DEBUG - struct kmx_host *host = NULL; - - list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) { - int ip[4]; - u32 addr = host->mxh_addr; - - ip[0] = (addr >> 24) & 0xff; - ip[1] = (addr >> 16) & 0xff; - ip[2] = (addr >> 8) & 0xff; - ip[3] = addr & 0xff; - CDEBUG(D_NET, "\tip= %d.%d.%d.%d\n\thost= %s\n\tboard= %d\n\tep_id= %d\n\n", - ip[0], ip[1], ip[2], ip[3], - host->mxh_hostname, host->mxh_board, host->mxh_ep_id); - } -#endif - return; -} - -#define MXLND_BUFSIZE (PAGE_SIZE - 1) - -int -mxlnd_parse_hosts(char *filename) -{ - int ret = 0; - s32 size = 0; - s32 bufsize = MXLND_BUFSIZE; - s32 allocd = 0; - loff_t offset = 0; - struct file *filp = NULL; - struct inode *inode = NULL; - char *buf = NULL; - s32 buf_off = 0; - char *sep = NULL; - char *line = NULL; - - if (filename == NULL) return -1; - - filp = filp_open(filename, O_RDONLY, 0); - if (IS_ERR(filp)) { - CERROR("filp_open() failed for %s\n", filename); - return -1; - } - - inode = filp->f_dentry->d_inode; - if (!S_ISREG(inode->i_mode)) { - CERROR("%s is not a regular file\n", filename); - return -1; - } - - size = (s32) inode->i_size; - if (size < MXLND_BUFSIZE) bufsize = size; - allocd = bufsize; - MXLND_ALLOC(buf, allocd + 1); - if (buf == NULL) { - CERROR("Cannot allocate buf\n"); - filp_close(filp, current->files); - return -1; - } - - while (offset < size) { - memset(buf, 0, bufsize + 1); - ret = kernel_read(filp, (unsigned long) offset, buf, (unsigned long) bufsize); - if (ret < 0) { - CDEBUG(D_NETERROR, "kernel_read() returned %d - closing %s\n", ret, filename); - filp_close(filp, current->files); - MXLND_FREE(buf, allocd + 1); - return -1; - } - - if (ret < bufsize) bufsize = ret; - buf_off = 0; - while (buf_off < bufsize) { - sep = strchr(buf + buf_off, '\n'); - if (sep != NULL) { - /* we have a line */ - line = buf + buf_off; - *sep = '\0'; - ret = mxlnd_parse_line(line); - if (ret != 0 && strlen(line) != 0) { - CDEBUG(D_NETERROR, "Failed to parse \"%s\". Ignoring this host.\n", line); - } - buf_off += strlen(line) + 1; - } else { - /* last line or we need to read more */ - line = buf + buf_off; - ret = mxlnd_parse_line(line); - if (ret != 0) { - bufsize -= strlen(line) + 1; - } - buf_off += strlen(line) + 1; - } - } - offset += bufsize; - bufsize = MXLND_BUFSIZE; - } - - MXLND_FREE(buf, allocd + 1); - filp_close(filp, current->files); - mxlnd_print_hosts(); - - return 0; -} - -/** - * mxlnd_init_mx - open the endpoint, set out ID, register the EAGER callback - * @ni - the network interface - * - * Returns 0 on success, -1 on failure - */ -int -mxlnd_init_mx(lnet_ni_t *ni) -{ - int ret = 0; - int found = 0; - mx_return_t mxret; - mx_endpoint_addr_t addr; - u32 board = *kmxlnd_tunables.kmx_board; - u32 ep_id = *kmxlnd_tunables.kmx_ep_id; - u64 nic_id = 0LL; - struct kmx_host *host = NULL; - - mxret = mx_init(); - if (mxret != MX_SUCCESS) { - CERROR("mx_init() failed with %s (%d)\n", mx_strerror(mxret), mxret); - return -1; - } - - ret = mxlnd_parse_hosts(*kmxlnd_tunables.kmx_hosts); - if (ret != 0) { - if (*kmxlnd_tunables.kmx_hosts != NULL) { - CERROR("mxlnd_parse_hosts(%s) failed\n", *kmxlnd_tunables.kmx_hosts); - } - mx_finalize(); - return -1; - } - - list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) { - if (strcmp(host->mxh_hostname, system_utsname.nodename) == 0) { - /* override the defaults and module parameters with - * the info from the hosts file */ - board = host->mxh_board; - ep_id = host->mxh_ep_id; - kmxlnd_data.kmx_localhost = host; - CDEBUG(D_NET, "my hostname is %s board %d ep_id %d\n", kmxlnd_data.kmx_localhost->mxh_hostname, kmxlnd_data.kmx_localhost->mxh_board, kmxlnd_data.kmx_localhost->mxh_ep_id); - found = 1; - break; - } - } - - if (found == 0) { - CERROR("no host entry found for localhost\n"); - mx_finalize(); - return -1; - } - - mxret = mx_open_endpoint(board, ep_id, MXLND_MSG_MAGIC, - NULL, 0, &kmxlnd_data.kmx_endpt); - if (mxret != MX_SUCCESS) { - CERROR("mx_open_endpoint() failed with %d\n", mxret); - mx_finalize(); - return -1; - } - - mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &addr); - mx_decompose_endpoint_addr(addr, &nic_id, &ep_id); - - LASSERT(host != NULL); - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), host->mxh_addr); - - CDEBUG(D_NET, "My NID is 0x%llx\n", ni->ni_nid); - - /* this will catch all unexpected receives. */ - mxret = mx_register_unexp_handler(kmxlnd_data.kmx_endpt, - (mx_unexp_handler_t) mxlnd_unexpected_recv, - NULL); - if (mxret != MX_SUCCESS) { - CERROR("mx_register_unexp_callback() failed with %s\n", - mx_strerror(mxret)); - mx_close_endpoint(kmxlnd_data.kmx_endpt); - mx_finalize(); - return -1; - } - mxret = mx_set_request_timeout(kmxlnd_data.kmx_endpt, NULL, MXLND_COMM_TIMEOUT/HZ*1000); - if (mxret != MX_SUCCESS) { - CERROR("mx_set_request_timeout() failed with %s\n", - mx_strerror(mxret)); - mx_close_endpoint(kmxlnd_data.kmx_endpt); - mx_finalize(); - return -1; - } - return 0; -} - - -/** - * mxlnd_thread_start - spawn a kernel thread with this function - * @fn - function pointer - * @arg - pointer to the parameter data - * - * Returns 0 on success and a negative value on failure - */ -int -mxlnd_thread_start(int (*fn)(void *arg), void *arg) -{ - int pid = 0; - int i = (int) ((long) arg); - - atomic_inc(&kmxlnd_data.kmx_nthreads); - init_completion(&kmxlnd_data.kmx_completions[i]); - - pid = kernel_thread (fn, arg, 0); - if (pid < 0) { - CERROR("kernel_thread() failed with %d\n", pid); - atomic_dec(&kmxlnd_data.kmx_nthreads); - } - return pid; -} - -/** - * mxlnd_thread_stop - decrement thread counter - * - * The thread returns 0 when it detects shutdown. - * We are simply decrementing the thread counter. - */ -void -mxlnd_thread_stop(long id) -{ - int i = (int) id; - atomic_dec (&kmxlnd_data.kmx_nthreads); - complete(&kmxlnd_data.kmx_completions[i]); -} - -/** - * mxlnd_shutdown - stop IO, clean up state - * @ni - LNET interface handle - * - * No calls to the LND should be made after calling this function. - */ -void -mxlnd_shutdown (lnet_ni_t *ni) -{ - int i = 0; - int nthreads = 2 + *kmxlnd_tunables.kmx_n_waitd; - - LASSERT (ni == kmxlnd_data.kmx_ni); - LASSERT (ni->ni_data == &kmxlnd_data); - CDEBUG(D_NET, "in shutdown()\n"); - - CDEBUG(D_MALLOC, "before MXLND cleanup: libcfs_kmemory %d " - "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory), - kmxlnd_data.kmx_mem_used); - - switch (kmxlnd_data.kmx_init) { - - case MXLND_INIT_ALL: - - CDEBUG(D_NET, "setting shutdown = 1\n"); - /* set shutdown and wakeup request_waitds */ - kmxlnd_data.kmx_shutdown = 1; - mb(); - mx_wakeup(kmxlnd_data.kmx_endpt); - up(&kmxlnd_data.kmx_tx_queue_sem); - mxlnd_sleep(2 * HZ); - - /* fall through */ - - case MXLND_INIT_THREADS: - - CDEBUG(D_NET, "waiting on threads\n"); - /* wait for threads to complete */ - for (i = 0; i < nthreads; i++) { - wait_for_completion(&kmxlnd_data.kmx_completions[i]); - } - LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0); - - CDEBUG(D_NET, "freeing completions\n"); - MXLND_FREE(kmxlnd_data.kmx_completions, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - - /* fall through */ - - case MXLND_INIT_MX: - - CDEBUG(D_NET, "stopping mx\n"); - - /* wakeup waiters if they missed the above. - * close endpoint to stop all traffic. - * this will cancel and cleanup all requests, etc. */ - - mx_wakeup(kmxlnd_data.kmx_endpt); - mx_close_endpoint(kmxlnd_data.kmx_endpt); - mx_finalize(); - - CDEBUG(D_NET, "mxlnd_free_hosts();\n"); - mxlnd_free_hosts(); - - /* fall through */ - - case MXLND_INIT_RXS: - - CDEBUG(D_NET, "freeing rxs\n"); - - /* free all rxs and associated pages */ - mxlnd_free_rxs(); - - /* fall through */ - - case MXLND_INIT_TXS: - - CDEBUG(D_NET, "freeing txs\n"); - - /* free all txs and associated pages */ - mxlnd_free_txs(); - - /* fall through */ - - case MXLND_INIT_DATA: - - CDEBUG(D_NET, "freeing peers\n"); - - /* free peer list */ - mxlnd_free_peers(); - - /* fall through */ - - case MXLND_INIT_NOTHING: - break; - } - CDEBUG(D_NET, "shutdown complete\n"); - - CDEBUG(D_MALLOC, "after MXLND cleanup: libcfs_kmemory %d " - "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory), - kmxlnd_data.kmx_mem_used); - - kmxlnd_data.kmx_init = MXLND_INIT_NOTHING; - PORTAL_MODULE_UNUSE; - return; -} - -/** - * mxlnd_startup - initialize state, open an endpoint, start IO - * @ni - LNET interface handle - * - * Initialize state, open an endpoint, start monitoring threads. - * Should only be called once. - */ -int -mxlnd_startup (lnet_ni_t *ni) -{ - int i = 0; - int ret = 0; - int nthreads = 2; /* for timeoutd and tx_queued */ - struct timeval tv; - - LASSERT (ni->ni_lnd == &the_kmxlnd); - - if (kmxlnd_data.kmx_init != MXLND_INIT_NOTHING) { - CERROR("Only 1 instance supported\n"); - return -EPERM; - } - CDEBUG(D_MALLOC, "before MXLND startup: libcfs_kmemory %d " - "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory), - kmxlnd_data.kmx_mem_used); - - /* reserve 1/2 of tx for connect request messages */ - ni->ni_maxtxcredits = *kmxlnd_tunables.kmx_ntx / 2; - ni->ni_peertxcredits = *kmxlnd_tunables.kmx_credits; - if (ni->ni_maxtxcredits < ni->ni_peertxcredits) - ni->ni_maxtxcredits = ni->ni_peertxcredits; - - PORTAL_MODULE_USE; - memset (&kmxlnd_data, 0, sizeof (kmxlnd_data)); - - kmxlnd_data.kmx_ni = ni; - ni->ni_data = &kmxlnd_data; - - do_gettimeofday(&tv); - kmxlnd_data.kmx_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - CDEBUG(D_NET, "my incarnation is %lld\n", kmxlnd_data.kmx_incarnation); - - spin_lock_init (&kmxlnd_data.kmx_global_lock); - - INIT_LIST_HEAD (&kmxlnd_data.kmx_conn_req); - spin_lock_init (&kmxlnd_data.kmx_conn_lock); - sema_init(&kmxlnd_data.kmx_conn_sem, 0); - - INIT_LIST_HEAD (&kmxlnd_data.kmx_hosts); - spin_lock_init (&kmxlnd_data.kmx_hosts_lock); - - for (i = 0; i < MXLND_HASH_SIZE; i++) { - INIT_LIST_HEAD (&kmxlnd_data.kmx_peers[i]); - } - rwlock_init (&kmxlnd_data.kmx_peers_lock); - - INIT_LIST_HEAD (&kmxlnd_data.kmx_txs); - INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_idle); - spin_lock_init (&kmxlnd_data.kmx_tx_idle_lock); - kmxlnd_data.kmx_tx_next_cookie = 1; - INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_queue); - spin_lock_init (&kmxlnd_data.kmx_tx_queue_lock); - sema_init(&kmxlnd_data.kmx_tx_queue_sem, 0); - - INIT_LIST_HEAD (&kmxlnd_data.kmx_rxs); - spin_lock_init (&kmxlnd_data.kmx_rxs_lock); - INIT_LIST_HEAD (&kmxlnd_data.kmx_rx_idle); - spin_lock_init (&kmxlnd_data.kmx_rx_idle_lock); - - kmxlnd_data.kmx_init = MXLND_INIT_DATA; - /*****************************************************/ - - ret = mxlnd_init_txs(); - if (ret != 0) { - CERROR("Can't alloc tx descs: %d\n", ret); - goto failed; - } - kmxlnd_data.kmx_init = MXLND_INIT_TXS; - /*****************************************************/ - - ret = mxlnd_init_rxs(); - if (ret != 0) { - CERROR("Can't alloc rx descs: %d\n", ret); - goto failed; - } - kmxlnd_data.kmx_init = MXLND_INIT_RXS; - /*****************************************************/ - - ret = mxlnd_init_mx(ni); - if (ret != 0) { - CERROR("Can't init mx\n"); - goto failed; - } - - kmxlnd_data.kmx_init = MXLND_INIT_MX; - /*****************************************************/ - - /* start threads */ - - nthreads += *kmxlnd_tunables.kmx_n_waitd; - MXLND_ALLOC (kmxlnd_data.kmx_completions, - nthreads * sizeof(struct completion)); - if (kmxlnd_data.kmx_completions == NULL) { - CERROR("failed to alloc kmxlnd_data.kmx_completions\n"); - goto failed; - } - memset(kmxlnd_data.kmx_completions, 0, - nthreads * sizeof(struct completion)); - - { - CDEBUG(D_NET, "using %d %s in mx_wait_any()\n", - *kmxlnd_tunables.kmx_n_waitd, - *kmxlnd_tunables.kmx_n_waitd == 1 ? "thread" : "threads"); - - for (i = 0; i < *kmxlnd_tunables.kmx_n_waitd; i++) { - ret = mxlnd_thread_start(mxlnd_request_waitd, (void*)((long)i)); - if (ret < 0) { - CERROR("Starting mxlnd_request_waitd[%d] failed with %d\n", i, ret); - kmxlnd_data.kmx_shutdown = 1; - mx_wakeup(kmxlnd_data.kmx_endpt); - for (--i; i >= 0; i--) { - wait_for_completion(&kmxlnd_data.kmx_completions[i]); - } - LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0); - MXLND_FREE(kmxlnd_data.kmx_completions, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - - goto failed; - } - } - ret = mxlnd_thread_start(mxlnd_tx_queued, (void*)((long)i++)); - if (ret < 0) { - CERROR("Starting mxlnd_tx_queued failed with %d\n", ret); - kmxlnd_data.kmx_shutdown = 1; - mx_wakeup(kmxlnd_data.kmx_endpt); - for (--i; i >= 0; i--) { - wait_for_completion(&kmxlnd_data.kmx_completions[i]); - } - LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0); - MXLND_FREE(kmxlnd_data.kmx_completions, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - goto failed; - } - ret = mxlnd_thread_start(mxlnd_timeoutd, (void*)((long)i++)); - if (ret < 0) { - CERROR("Starting mxlnd_timeoutd failed with %d\n", ret); - kmxlnd_data.kmx_shutdown = 1; - mx_wakeup(kmxlnd_data.kmx_endpt); - up(&kmxlnd_data.kmx_tx_queue_sem); - for (--i; i >= 0; i--) { - wait_for_completion(&kmxlnd_data.kmx_completions[i]); - } - LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0); - MXLND_FREE(kmxlnd_data.kmx_completions, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - goto failed; - } - } - - kmxlnd_data.kmx_init = MXLND_INIT_THREADS; - /*****************************************************/ - - kmxlnd_data.kmx_init = MXLND_INIT_ALL; - CDEBUG(D_MALLOC, "startup complete (kmx_mem_used %ld)\n", kmxlnd_data.kmx_mem_used); - - return 0; -failed: - CERROR("mxlnd_startup failed\n"); - mxlnd_shutdown(ni); - return (-ENETDOWN); -} - -static int mxlnd_init(void) -{ - lnet_register_lnd(&the_kmxlnd); - return 0; -} - -static void mxlnd_exit(void) -{ - lnet_unregister_lnd(&the_kmxlnd); - return; -} - -module_init(mxlnd_init); -module_exit(mxlnd_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Myricom, Inc. - help@myri.com"); -MODULE_DESCRIPTION("Kernel MyrinetExpress LND"); -MODULE_VERSION("0.5.0"); diff --git a/lnet/klnds/mxlnd/mxlnd.h b/lnet/klnds/mxlnd/mxlnd.h deleted file mode 100644 index 407190322253507bd5c7f82c3cbee6018f6e7b44..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd.h +++ /dev/null @@ -1,419 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Scott Atchley <atchley at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -#define EXPORT_SYMTAB -#endif -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> /* module */ -#include <linux/kernel.h> /* module */ -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> -#include <linux/fs.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> /* module */ -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <linux/random.h> -#include <linux/utsname.h> - -#include <net/sock.h> -#include <linux/in.h> - -#include <linux/netdevice.h> /* these are needed for ARP */ -#include <linux/if_arp.h> -#include <net/arp.h> -#include <linux/inetdevice.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include "libcfs/kp30.h" -#include "lnet/lnet.h" -#include "lnet/lib-lnet.h" - -#define MX_KERNEL 1 -#include "mx_extensions.h" -#include "myriexpress.h" - -#if LNET_MAX_IOV > MX_MAX_SEGMENTS - #error LNET_MAX_IOV is greater then MX_MAX_SEGMENTS -#endif - -/* Using MX's 64 match bits - * We are using the match bits to specify message type and the cookie. The - * highest four bits (60-63) are reserved for message type. Below we specify - * the types. MXLND_MASK_ICON_REQ and MXLND_MASK_ICON_ACK are used for - * mx_iconnect(). We reserve the remaining combinations for future use. The - * next 8 bits (52-59) are reserved for returning a status code for failed - * GET_DATA (payload) messages. The last 52 bits are used for cookies. That - * should allow unique cookies for 4 KB messages at 10 Gbps line rate without - * rollover for about 8 years. That should be enough. */ - -/* constants */ -#define MXLND_MASK_ICON_REQ (0xBLL << 60) /* it is a mx_iconnect() completion */ -#define MXLND_MASK_CONN_REQ (0xCLL << 60) /* CONN_REQ msg */ -#define MXLND_MASK_ICON_ACK (0x9LL << 60) /* it is a mx_iconnect() completion */ -#define MXLND_MASK_CONN_ACK (0xALL << 60) /* CONN_ACK msg*/ -#define MXLND_MASK_EAGER (0xELL << 60) /* EAGER msg */ -#define MXLND_MASK_NOOP (0x1LL << 60) /* NOOP msg */ -#define MXLND_MASK_PUT_REQ (0x2LL << 60) /* PUT_REQ msg */ -#define MXLND_MASK_PUT_ACK (0x3LL << 60) /* PUT_ACK msg */ -#define MXLND_MASK_PUT_DATA (0x4LL << 60) /* PUT_DATA msg */ -#define MXLND_MASK_GET_REQ (0x5LL << 60) /* GET_REQ msg */ -#define MXLND_MASK_GET_DATA (0x6LL << 60) /* GET_DATA msg */ -//#define MXLND_MASK_NAK (0x7LL << 60) /* NAK msg */ - -#define MXLND_MAX_COOKIE ((1LL << 52) - 1) /* when to roll-over the cookie value */ -#define MXLND_NCOMPLETIONS (MXLND_N_SCHED + 2) /* max threads for completion array */ - -/* defaults for configurable parameters */ -#define MXLND_N_SCHED 1 /* # schedulers (mx_wait_any() threads) */ -#define MXLND_MX_BOARD 0 /* Use the first MX NIC if more than 1 avail */ -#define MXLND_MX_EP_ID 3 /* MX endpoint ID */ -#define MXLND_COMM_TIMEOUT (20 * HZ) /* timeout for send/recv (jiffies) */ -#define MXLND_WAIT_TIMEOUT HZ /* timeout for wait (jiffies) */ -#define MXLND_POLLING 1000 /* poll iterations before blocking */ -#define MXLND_MAX_PEERS 1024 /* number of nodes talking to me */ -#define MXLND_EAGER_NUM MXLND_MAX_PEERS /* number of pre-posted receives */ -#define MXLND_EAGER_SIZE PAGE_SIZE /* pre-posted eager message size */ -#define MXLND_MSG_QUEUE_DEPTH 8 /* msg queue depth */ -#define MXLND_CREDIT_HIGHWATER (MXLND_MSG_QUEUE_DEPTH - 2) - /* when to send a noop to return credits */ -#define MXLND_NTX 256 /* # of kmx_tx - total sends in flight - 1/2 are reserved for connect messages */ - -#define MXLND_HASH_BITS 6 /* the number of bits to hash over */ -#define MXLND_HASH_SIZE (1<<MXLND_HASH_BITS) - /* number of peer lists for lookup. - we hash over the last N bits of - the IP address converted to an int. */ -#define MXLND_HASH_MASK (MXLND_HASH_SIZE - 1) - /* ensure we use only the last N bits */ - -/* debugging features */ -#define MXLND_CKSUM 0 /* checksum kmx_msg_t */ -#define MXLND_DEBUG 0 /* turn on printk()s */ - -extern inline void mxlnd_noop(char *s, ...); -#if MXLND_DEBUG - #define MXLND_PRINT printk -#else - #define MXLND_PRINT mxlnd_noop -#endif - -/* provide wrappers around LIBCFS_ALLOC/FREE to keep MXLND specific - * memory usage stats that include pages */ - -#define MXLND_ALLOC(x, size) \ - do { \ - spin_lock(&kmxlnd_data.kmx_global_lock); \ - kmxlnd_data.kmx_mem_used += size; \ - spin_unlock(&kmxlnd_data.kmx_global_lock); \ - LIBCFS_ALLOC(x, size); \ - if (x == NULL) { \ - spin_lock(&kmxlnd_data.kmx_global_lock); \ - kmxlnd_data.kmx_mem_used -= size; \ - spin_unlock(&kmxlnd_data.kmx_global_lock); \ - } \ - } while (0) - -#define MXLND_FREE(x, size) \ - do { \ - spin_lock(&kmxlnd_data.kmx_global_lock); \ - kmxlnd_data.kmx_mem_used -= size; \ - spin_unlock(&kmxlnd_data.kmx_global_lock); \ - LIBCFS_FREE(x, size); \ - } while (0) - - -typedef struct kmx_tunables { - int *kmx_n_waitd; /* # completion threads */ - int *kmx_max_peers; /* max # of potential peers */ - int *kmx_cksum; /* checksum small msgs? */ - int *kmx_ntx; /* total # of tx (1/2 for LNET 1/2 for CONN_REQ */ - int *kmx_credits; /* concurrent sends to 1 peer */ - int *kmx_board; /* MX board (NIC) number */ - int *kmx_ep_id; /* MX endpoint number */ - int *kmx_polling; /* if 0, block. if > 0, poll this many - iterations before blocking */ - char **kmx_hosts; /* Location of hosts file, if used */ -} kmx_tunables_t; - -/* structure to hold IP-to-hostname resolution data */ -struct kmx_host { - struct kmx_peer *mxh_peer; /* pointer to matching peer */ - u32 mxh_addr; /* IP address as int */ - char *mxh_hostname; /* peer's hostname */ - u32 mxh_board; /* peer's board rank */ - u32 mxh_ep_id; /* peer's MX endpoint ID */ - struct list_head mxh_list; /* position on kmx_hosts */ - spinlock_t mxh_lock; /* lock */ -}; - -/* global interface state */ -typedef struct kmx_data -{ - int kmx_init; /* initialization state */ - int kmx_shutdown; /* shutting down? */ - atomic_t kmx_nthreads; /* number of threads */ - struct completion *kmx_completions; /* array of completion structs */ - lnet_ni_t *kmx_ni; /* the LND instance */ - u64 kmx_incarnation; /* my incarnation value - unused */ - long kmx_mem_used; /* memory used */ - struct kmx_host *kmx_localhost; /* pointer to my kmx_host info */ - mx_endpoint_t kmx_endpt; /* the MX endpoint */ - - spinlock_t kmx_global_lock; /* global lock */ - - struct list_head kmx_conn_req; /* list of connection requests */ - spinlock_t kmx_conn_lock; /* connection list lock */ - struct semaphore kmx_conn_sem; /* semaphore for connection request list */ - - struct list_head kmx_hosts; /* host lookup info */ - spinlock_t kmx_hosts_lock; /* hosts list lock */ - - struct list_head kmx_peers[MXLND_HASH_SIZE]; - /* list of all known peers */ - rwlock_t kmx_peers_lock; /* peer list rw lock */ - atomic_t kmx_npeers; /* number of peers */ - - struct list_head kmx_txs; /* all tx descriptors */ - struct list_head kmx_tx_idle; /* list of idle tx */ - spinlock_t kmx_tx_idle_lock; /* lock for idle tx list */ - s32 kmx_tx_used; /* txs in use */ - u64 kmx_tx_next_cookie; /* unique id for tx */ - struct list_head kmx_tx_queue; /* generic send queue */ - spinlock_t kmx_tx_queue_lock; /* lock for generic sends */ - struct semaphore kmx_tx_queue_sem; /* semaphore for tx queue */ - - struct list_head kmx_rxs; /* all rx descriptors */ - spinlock_t kmx_rxs_lock; /* lock for rxs list */ - struct list_head kmx_rx_idle; /* list of idle tx */ - spinlock_t kmx_rx_idle_lock; /* lock for idle rx list */ -} kmx_data_t; - -#define MXLND_INIT_NOTHING 0 /* in the beginning, there was nothing... */ -#define MXLND_INIT_DATA 1 /* main data structures created */ -#define MXLND_INIT_TXS 2 /* tx descriptors created */ -#define MXLND_INIT_RXS 3 /* initial rx descriptors created */ -#define MXLND_INIT_MX 4 /* initiate MX library, open endpoint, get NIC id */ -#define MXLND_INIT_THREADS 5 /* waitd, timeoutd, tx_queued threads */ -#define MXLND_INIT_ALL 6 /* startup completed */ - -#include "mxlnd_wire.h" - -enum kmx_req_type { - MXLND_REQ_TX = 0, - MXLND_REQ_RX = 1, -}; - -/* The life cycle of a request */ -enum kmx_req_state { - MXLND_CTX_INIT = 0, /* just created */ - MXLND_CTX_IDLE = 1, /* available for use */ - MXLND_CTX_PREP = 2, /* getting ready for send/recv */ - MXLND_CTX_PENDING = 3, /* mx_isend() or mx_irecv() called */ - MXLND_CTX_COMPLETED = 4, /* cleaning up after completion or timeout */ - MXLND_CTX_CANCELED = 5, /* timed out but still in ctx list */ -}; - -/* Context Structure - generic tx/rx descriptor - * It represents the context (or state) of each send or receive request. - * In other LNDs, they have separate TX and RX descriptors and this replaces both. - * - * We will keep the these on the global kmx_rxs and kmx_txs lists for cleanup - * during shutdown(). We will move them between the rx/tx idle lists and the - * pending list which is monitored by mxlnd_timeoutd(). - */ -struct kmx_ctx { - enum kmx_req_type mxc_type; /* TX or RX */ - u64 mxc_incarnation; /* store the peer's incarnation here - to verify before changing flow - control credits after completion */ - unsigned long mxc_deadline; /* request time out in absolute jiffies */ - enum kmx_req_state mxc_state; /* what is the state of the request? */ - struct list_head mxc_global_list; /* place on kmx_rxs or kmx_txs */ - struct list_head mxc_list; /* place on rx/tx idle list, tx q, peer tx */ - struct list_head mxc_rx_list; /* place on mxp_rx_posted list */ - spinlock_t mxc_lock; /* lock */ - - lnet_nid_t mxc_nid; /* dst's NID if peer is not known */ - struct kmx_peer *mxc_peer; /* owning peer */ - struct kmx_conn *mxc_conn; /* owning conn */ - struct kmx_msg *mxc_msg; /* msg hdr mapped to mxc_page */ - struct page *mxc_page; /* buffer for eager msgs */ - lnet_msg_t *mxc_lntmsg[2]; /* lnet msgs to finalize */ - - u8 mxc_msg_type; /* what type of message is this? */ - u64 mxc_cookie; /* completion cookie */ - u64 mxc_match; /* MX match info */ - mx_ksegment_t mxc_seg; /* local MX ksegment for non-DATA */ - mx_ksegment_t *mxc_seg_list; /* MX ksegment array for DATA */ - int mxc_nseg; /* number of segments */ - unsigned long mxc_pin_type; /* MX_PIN_KERNEL or MX_PIN_PHYSICAL */ - u32 mxc_nob; /* number of bytes sent/received */ - mx_request_t mxc_mxreq; /* MX request */ - mx_status_t mxc_status; /* MX status */ - s64 mxc_get; /* # of times returned from idle list */ - s64 mxc_put; /* # of times returned from idle list */ -}; - -#define MXLND_CONN_DISCONNECT -2 /* conn is being destroyed - do not add txs */ -#define MXLND_CONN_FAIL -1 /* connect failed (bad handshake, unavail, etc.) */ -#define MXLND_CONN_INIT 0 /* in the beginning, there was nothing... */ -#define MXLND_CONN_REQ 1 /* a connection request message is needed */ -#define MXLND_CONN_ACK 2 /* a connection ack is needed */ -#define MXLND_CONN_WAIT 3 /* waiting for req or ack to complete */ -#define MXLND_CONN_READY 4 /* ready to send */ - -/* connection state - queues for queued and pending msgs */ -struct kmx_conn -{ - u64 mxk_incarnation; /* connections's incarnation value */ - atomic_t mxk_refcount; /* reference counting */ - - struct kmx_peer *mxk_peer; /* owning peer */ - mx_endpoint_addr_t mxk_epa; /* peer's endpoint address */ - - struct list_head mxk_list; /* for placing on mxp_conns */ - spinlock_t mxk_lock; /* lock */ - unsigned long mxk_timeout; /* expiration of oldest pending tx/rx */ - unsigned long mxk_last_tx; /* when last tx completed with success */ - unsigned long mxk_last_rx; /* when last rx completed */ - - int mxk_credits; /* # of my credits for sending to peer */ - int mxk_outstanding; /* # of credits to return */ - - int mxk_status; /* can we send messages? MXLND_CONN_* */ - struct list_head mxk_tx_credit_queue; /* send queue for peer */ - struct list_head mxk_tx_free_queue; /* send queue for peer */ - int mxk_ntx_msgs; /* # of msgs on tx queues */ - int mxk_ntx_data ; /* # of DATA on tx queues */ - int mxk_ntx_posted; /* # of tx msgs in flight */ - int mxk_data_posted; /* # of tx data payloads in flight */ - - struct list_head mxk_pending; /* in flight rxs and txs */ -}; - -/* peer state */ -struct kmx_peer -{ - lnet_nid_t mxp_nid; /* peer's LNET NID */ - u64 mxp_incarnation; /* peer's incarnation value */ - atomic_t mxp_refcount; /* reference counts */ - - struct kmx_host *mxp_host; /* peer lookup info */ - u64 mxp_nic_id; /* remote's MX nic_id for mx_connect() */ - - struct list_head mxp_peers; /* for placing on kmx_peers */ - spinlock_t mxp_lock; /* lock */ - - struct list_head mxp_conns; /* list of connections */ - struct kmx_conn *mxp_conn; /* current connection */ - - unsigned long mxp_reconnect_time; /* when to retry connect */ - int mxp_incompatible; /* incorrect conn_req values */ -}; - -extern kmx_data_t kmxlnd_data; -extern kmx_tunables_t kmxlnd_tunables; - -/* required for the LNET API */ -int mxlnd_startup(lnet_ni_t *ni); -void mxlnd_shutdown(lnet_ni_t *ni); -int mxlnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int mxlnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int mxlnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); - -/* in mxlnd.c */ -extern void mxlnd_thread_stop(long id); -extern int mxlnd_ctx_alloc(struct kmx_ctx **ctxp, enum kmx_req_type type); -extern void mxlnd_ctx_free(struct kmx_ctx *ctx); -extern void mxlnd_ctx_init(struct kmx_ctx *ctx); -extern lnet_nid_t mxlnd_nic_id2nid(lnet_ni_t *ni, u64 nic_id); -extern u64 mxlnd_nid2nic_id(lnet_nid_t nid); - -/* in mxlnd_cb.c */ -void mxlnd_eager_recv(void *context, uint64_t match_value, uint32_t length); -extern mx_unexp_handler_action_t mxlnd_unexpected_recv(void *context, - mx_endpoint_addr_t source, uint64_t match_value, uint32_t length, - void *data_if_available); -extern void mxlnd_peer_free(struct kmx_peer *peer); -extern void mxlnd_conn_free(struct kmx_conn *conn); -extern void mxlnd_sleep(unsigned long timeout); -extern int mxlnd_tx_queued(void *arg); -extern void mxlnd_handle_rx_completion(struct kmx_ctx *rx); -extern int mxlnd_check_sends(struct kmx_peer *peer); -extern int mxlnd_tx_peer_queued(void *arg); -extern int mxlnd_request_waitd(void *arg); -extern int mxlnd_unex_recvd(void *arg); -extern int mxlnd_timeoutd(void *arg); -extern int mxlnd_connd(void *arg); - -#define mxlnd_peer_addref(peer) \ -do { \ - LASSERT(peer != NULL); \ - LASSERT(atomic_read(&(peer)->mxp_refcount) > 0); \ - atomic_inc(&(peer)->mxp_refcount); \ -} while (0) - - -#define mxlnd_peer_decref(peer) \ -do { \ - LASSERT(atomic_read(&(peer)->mxp_refcount) > 0); \ - if (atomic_dec_and_test(&(peer)->mxp_refcount)) \ - mxlnd_peer_free(peer); \ -} while (0) - -#define mxlnd_conn_addref(conn) \ -do { \ - LASSERT(conn != NULL); \ - LASSERT(atomic_read(&(conn)->mxk_refcount) > 0); \ - atomic_inc(&(conn)->mxk_refcount); \ -} while (0) - - -#define mxlnd_conn_decref(conn) \ -do { \ - LASSERT(atomic_read(&(conn)->mxk_refcount) > 0); \ - if (atomic_dec_and_test(&(conn)->mxk_refcount)) \ - mxlnd_conn_free(conn); \ -} while (0) diff --git a/lnet/klnds/mxlnd/mxlnd_cb.c b/lnet/klnds/mxlnd/mxlnd_cb.c deleted file mode 100644 index 535f547e9c8984d8069ebf92d1d41777ba3e5022..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd_cb.c +++ /dev/null @@ -1,3599 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Myricom, Inc. <help at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "mxlnd.h" - -inline void mxlnd_noop(char *s, ...) -{ - return; -} - -char * -mxlnd_ctxstate_to_str(int mxc_state) -{ - switch (mxc_state) { - case MXLND_CTX_INIT: - return "MXLND_CTX_INIT"; - case MXLND_CTX_IDLE: - return "MXLND_CTX_IDLE"; - case MXLND_CTX_PREP: - return "MXLND_CTX_PREP"; - case MXLND_CTX_PENDING: - return "MXLND_CTX_PENDING"; - case MXLND_CTX_COMPLETED: - return "MXLND_CTX_COMPLETED"; - case MXLND_CTX_CANCELED: - return "MXLND_CTX_CANCELED"; - default: - return "*unknown*"; - } -} - -char * -mxlnd_connstatus_to_str(int mxk_status) -{ - switch (mxk_status) { - case MXLND_CONN_READY: - return "MXLND_CONN_READY"; - case MXLND_CONN_INIT: - return "MXLND_CONN_INIT"; - case MXLND_CONN_REQ: - return "MXLND_CONN_REQ"; - case MXLND_CONN_ACK: - return "MXLND_CONN_ACK"; - case MXLND_CONN_WAIT: - return "MXLND_CONN_WAIT"; - case MXLND_CONN_DISCONNECT: - return "MXLND_CONN_DISCONNECT"; - case MXLND_CONN_FAIL: - return "MXLND_CONN_FAIL"; - default: - return "unknown"; - } -} - -char * -mxlnd_msgtype_to_str(int type) { - switch (type) { - case MXLND_MSG_EAGER: - return "MXLND_MSG_EAGER"; - case MXLND_MSG_CONN_REQ: - return "MXLND_MSG_CONN_REQ"; - case MXLND_MSG_CONN_ACK: - return "MXLND_MSG_CONN_ACK"; - case MXLND_MSG_NOOP: - return "MXLND_MSG_NOOP"; - case MXLND_MSG_PUT_REQ: - return "MXLND_MSG_PUT_REQ"; - case MXLND_MSG_PUT_ACK: - return "MXLND_MSG_PUT_ACK"; - case MXLND_MSG_PUT_DATA: - return "MXLND_MSG_PUT_DATA"; - case MXLND_MSG_GET_REQ: - return "MXLND_MSG_GET_REQ"; - case MXLND_MSG_GET_DATA: - return "MXLND_MSG_GET_DATA"; - default: - return "unknown"; - } -} - -char * -mxlnd_lnetmsg_to_str(int type) -{ - switch (type) { - case LNET_MSG_ACK: - return "LNET_MSG_ACK"; - case LNET_MSG_PUT: - return "LNET_MSG_PUT"; - case LNET_MSG_GET: - return "LNET_MSG_GET"; - case LNET_MSG_REPLY: - return "LNET_MSG_REPLY"; - case LNET_MSG_HELLO: - return "LNET_MSG_HELLO"; - default: - LBUG(); - return "*unknown*"; - } -} - -static inline u64 -//mxlnd_create_match(u8 msg_type, u8 error, u64 cookie) -mxlnd_create_match(struct kmx_ctx *ctx, u8 error) -{ - u64 type = (u64) ctx->mxc_msg_type; - u64 err = (u64) error; - u64 match = 0LL; - - LASSERT(ctx->mxc_msg_type != 0); - LASSERT(ctx->mxc_cookie >> 52 == 0); - match = (type << 60) | (err << 52) | ctx->mxc_cookie; - return match; -} - -static inline void -mxlnd_parse_match(u64 match, u8 *msg_type, u8 *error, u64 *cookie) -{ - *msg_type = (u8) (match >> 60); - *error = (u8) ((match >> 52) & 0xFF); - *cookie = match & 0xFFFFFFFFFFFFFLL; - LASSERT(match == (MXLND_MASK_ICON_REQ & 0xF000000000000000LL) || - match == (MXLND_MASK_ICON_ACK & 0xF000000000000000LL) || - *msg_type == MXLND_MSG_EAGER || - *msg_type == MXLND_MSG_CONN_REQ || - *msg_type == MXLND_MSG_CONN_ACK || - *msg_type == MXLND_MSG_NOOP || - *msg_type == MXLND_MSG_PUT_REQ || - *msg_type == MXLND_MSG_PUT_ACK || - *msg_type == MXLND_MSG_PUT_DATA || - *msg_type == MXLND_MSG_GET_REQ || - *msg_type == MXLND_MSG_GET_DATA); - return; -} - -struct kmx_ctx * -mxlnd_get_idle_rx(void) -{ - struct list_head *tmp = NULL; - struct kmx_ctx *rx = NULL; - - spin_lock(&kmxlnd_data.kmx_rx_idle_lock); - - if (list_empty (&kmxlnd_data.kmx_rx_idle)) { - spin_unlock(&kmxlnd_data.kmx_rx_idle_lock); - return NULL; - } - - tmp = &kmxlnd_data.kmx_rx_idle; - rx = list_entry (tmp->next, struct kmx_ctx, mxc_list); - list_del_init(&rx->mxc_list); - spin_unlock(&kmxlnd_data.kmx_rx_idle_lock); - -#if MXLND_DEBUG - if (rx->mxc_get != rx->mxc_put) { - CDEBUG(D_NETERROR, "*** RX get (%lld) != put (%lld) ***\n", rx->mxc_get, rx->mxc_put); - CDEBUG(D_NETERROR, "*** incarnation= %lld ***\n", rx->mxc_incarnation); - CDEBUG(D_NETERROR, "*** deadline= %ld ***\n", rx->mxc_deadline); - CDEBUG(D_NETERROR, "*** state= %s ***\n", mxlnd_ctxstate_to_str(rx->mxc_state)); - CDEBUG(D_NETERROR, "*** listed?= %d ***\n", !list_empty(&rx->mxc_list)); - CDEBUG(D_NETERROR, "*** nid= 0x%llx ***\n", rx->mxc_nid); - CDEBUG(D_NETERROR, "*** peer= 0x%p ***\n", rx->mxc_peer); - CDEBUG(D_NETERROR, "*** msg_type= %s ***\n", mxlnd_msgtype_to_str(rx->mxc_msg_type)); - CDEBUG(D_NETERROR, "*** cookie= 0x%llx ***\n", rx->mxc_cookie); - CDEBUG(D_NETERROR, "*** nob= %d ***\n", rx->mxc_nob); - } -#endif - LASSERT (rx->mxc_get == rx->mxc_put); - - rx->mxc_get++; - - LASSERT (rx->mxc_state == MXLND_CTX_IDLE); - rx->mxc_state = MXLND_CTX_PREP; - - return rx; -} - -int -mxlnd_put_idle_rx(struct kmx_ctx *rx) -{ - if (rx == NULL) { - CDEBUG(D_NETERROR, "called with NULL pointer\n"); - return -EINVAL; - } else if (rx->mxc_type != MXLND_REQ_RX) { - CDEBUG(D_NETERROR, "called with tx\n"); - return -EINVAL; - } - LASSERT(rx->mxc_get == rx->mxc_put + 1); - mxlnd_ctx_init(rx); - rx->mxc_put++; - spin_lock(&kmxlnd_data.kmx_rx_idle_lock); - list_add_tail(&rx->mxc_list, &kmxlnd_data.kmx_rx_idle); - spin_unlock(&kmxlnd_data.kmx_rx_idle_lock); - return 0; -} - -int -mxlnd_reduce_idle_rxs(__u32 count) -{ - __u32 i = 0; - struct kmx_ctx *rx = NULL; - - spin_lock(&kmxlnd_data.kmx_rxs_lock); - for (i = 0; i < count; i++) { - rx = mxlnd_get_idle_rx(); - if (rx != NULL) { - struct list_head *tmp = &rx->mxc_global_list; - list_del_init(tmp); - mxlnd_ctx_free(rx); - } else { - CDEBUG(D_NETERROR, "only reduced %d out of %d rxs\n", i, count); - break; - } - } - spin_unlock(&kmxlnd_data.kmx_rxs_lock); - return 0; -} - -struct kmx_ctx * -mxlnd_get_idle_tx(void) -{ - struct list_head *tmp = NULL; - struct kmx_ctx *tx = NULL; - - spin_lock(&kmxlnd_data.kmx_tx_idle_lock); - - if (list_empty (&kmxlnd_data.kmx_tx_idle)) { - CDEBUG(D_NETERROR, "%d txs in use\n", kmxlnd_data.kmx_tx_used); - spin_unlock(&kmxlnd_data.kmx_tx_idle_lock); - return NULL; - } - - tmp = &kmxlnd_data.kmx_tx_idle; - tx = list_entry (tmp->next, struct kmx_ctx, mxc_list); - list_del_init(&tx->mxc_list); - - /* Allocate a new completion cookie. It might not be needed, - * but we've got a lock right now and we're unlikely to - * wrap... */ - tx->mxc_cookie = kmxlnd_data.kmx_tx_next_cookie++; - if (kmxlnd_data.kmx_tx_next_cookie > MXLND_MAX_COOKIE) { - kmxlnd_data.kmx_tx_next_cookie = 1; - } - kmxlnd_data.kmx_tx_used++; - spin_unlock(&kmxlnd_data.kmx_tx_idle_lock); - - LASSERT (tx->mxc_get == tx->mxc_put); - - tx->mxc_get++; - - LASSERT (tx->mxc_state == MXLND_CTX_IDLE); - LASSERT (tx->mxc_lntmsg[0] == NULL); - LASSERT (tx->mxc_lntmsg[1] == NULL); - - tx->mxc_state = MXLND_CTX_PREP; - - return tx; -} - -int -mxlnd_put_idle_tx(struct kmx_ctx *tx) -{ - //int failed = (tx->mxc_status.code != MX_STATUS_SUCCESS && tx->mxc_status.code != MX_STATUS_TRUNCATED); - int result = 0; - lnet_msg_t *lntmsg[2]; - - if (tx == NULL) { - CDEBUG(D_NETERROR, "called with NULL pointer\n"); - return -EINVAL; - } else if (tx->mxc_type != MXLND_REQ_TX) { - CDEBUG(D_NETERROR, "called with rx\n"); - return -EINVAL; - } - if (!(tx->mxc_status.code == MX_STATUS_SUCCESS || - tx->mxc_status.code == MX_STATUS_TRUNCATED)) - result = -EIO; - - lntmsg[0] = tx->mxc_lntmsg[0]; - lntmsg[1] = tx->mxc_lntmsg[1]; - - LASSERT(tx->mxc_get == tx->mxc_put + 1); - mxlnd_ctx_init(tx); - tx->mxc_put++; - spin_lock(&kmxlnd_data.kmx_tx_idle_lock); - list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle); - kmxlnd_data.kmx_tx_used--; - spin_unlock(&kmxlnd_data.kmx_tx_idle_lock); - if (lntmsg[0] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[0], result); - if (lntmsg[1] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[1], result); - return 0; -} - -/** - * mxlnd_conn_free - free the conn - * @conn - a kmx_conn pointer - * - * The calling function should remove the conn from the conns list first - * then destroy it. - */ -void -mxlnd_conn_free(struct kmx_conn *conn) -{ - struct kmx_peer *peer = conn->mxk_peer; - - CDEBUG(D_NET, "freeing conn 0x%p *****\n", conn); - LASSERT (list_empty (&conn->mxk_tx_credit_queue) && - list_empty (&conn->mxk_tx_free_queue) && - list_empty (&conn->mxk_pending)); - if (!list_empty(&conn->mxk_list)) { - spin_lock(&peer->mxp_lock); - list_del_init(&conn->mxk_list); - if (peer->mxp_conn == conn) { - peer->mxp_conn = NULL; - if (!(conn->mxk_epa.stuff[0] == 0 && conn->mxk_epa.stuff[1] == 0)) { - mx_set_endpoint_addr_context(conn->mxk_epa, - (void *) NULL); - } - } - spin_unlock(&peer->mxp_lock); - } - mxlnd_peer_decref(conn->mxk_peer); /* drop conn's ref to peer */ - MXLND_FREE (conn, sizeof (*conn)); - return; -} - - -void -mxlnd_conn_cancel_pending_rxs(struct kmx_conn *conn) -{ - int found = 0; - struct kmx_ctx *ctx = NULL; - struct kmx_ctx *next = NULL; - mx_return_t mxret = MX_SUCCESS; - u32 result = 0; - - do { - found = 0; - spin_lock(&conn->mxk_lock); - list_for_each_entry_safe(ctx, next, &conn->mxk_pending, mxc_list) { - /* we will delete all including txs */ - list_del_init(&ctx->mxc_list); - if (ctx->mxc_type == MXLND_REQ_RX) { - found = 1; - mxret = mx_cancel(kmxlnd_data.kmx_endpt, - &ctx->mxc_mxreq, - &result); - if (mxret != MX_SUCCESS) { - CDEBUG(D_NETERROR, "mx_cancel() returned %s (%d)\n", mx_strerror(mxret), mxret); - } - if (result == 1) { - ctx->mxc_status.code = -ECONNABORTED; - ctx->mxc_state = MXLND_CTX_CANCELED; - /* NOTE this calls lnet_finalize() and - * we cannot hold any locks when calling it. - * It also calls mxlnd_conn_decref(conn) */ - spin_unlock(&conn->mxk_lock); - mxlnd_handle_rx_completion(ctx); - spin_lock(&conn->mxk_lock); - } - break; - } - } - spin_unlock(&conn->mxk_lock); - } - while (found); - - return; -} - -/** - * mxlnd_conn_disconnect - shutdown a connection - * @conn - a kmx_conn pointer - * - * This function sets the status to DISCONNECT, completes queued - * txs with failure, calls mx_disconnect, which will complete - * pending txs and matched rxs with failure. - */ -void -mxlnd_conn_disconnect(struct kmx_conn *conn, int mx_dis, int notify) -{ - struct list_head *tmp = NULL; - - spin_lock(&conn->mxk_lock); - if (conn->mxk_status == MXLND_CONN_DISCONNECT) { - spin_unlock(&conn->mxk_lock); - return; - } - conn->mxk_status = MXLND_CONN_DISCONNECT; - conn->mxk_timeout = 0; - - while (!list_empty(&conn->mxk_tx_free_queue) || - !list_empty(&conn->mxk_tx_credit_queue)) { - - struct kmx_ctx *tx = NULL; - - if (!list_empty(&conn->mxk_tx_free_queue)) { - tmp = &conn->mxk_tx_free_queue; - } else { - tmp = &conn->mxk_tx_credit_queue; - } - - tx = list_entry(tmp->next, struct kmx_ctx, mxc_list); - list_del_init(&tx->mxc_list); - tx->mxc_status.code = -ECONNABORTED; - spin_unlock(&conn->mxk_lock); - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); /* for this tx */ - spin_lock(&conn->mxk_lock); - } - - spin_unlock(&conn->mxk_lock); - - /* cancel pending rxs */ - mxlnd_conn_cancel_pending_rxs(conn); - - if (kmxlnd_data.kmx_shutdown != 1) { - - if (mx_dis) mx_disconnect(kmxlnd_data.kmx_endpt, conn->mxk_epa); - - if (notify) { - time_t last_alive = 0; - unsigned long last_msg = 0; - - /* notify LNET that we are giving up on this peer */ - if (time_after(conn->mxk_last_rx, conn->mxk_last_tx)) { - last_msg = conn->mxk_last_rx; - } else { - last_msg = conn->mxk_last_tx; - } - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - last_msg); - lnet_notify(kmxlnd_data.kmx_ni, conn->mxk_peer->mxp_nid, 0, last_alive); - } - } - mxlnd_conn_decref(conn); /* drop the owning peer's reference */ - - return; -} - -/** - * mxlnd_conn_alloc - allocate and initialize a new conn struct - * @connp - address of a kmx_conn pointer - * @peer - owning kmx_peer - * - * Returns 0 on success and -ENOMEM on failure - */ -int -mxlnd_conn_alloc_locked(struct kmx_conn **connp, struct kmx_peer *peer) -{ - struct kmx_conn *conn = NULL; - - LASSERT(peer != NULL); - - MXLND_ALLOC(conn, sizeof (*conn)); - if (conn == NULL) { - CDEBUG(D_NETERROR, "Cannot allocate conn\n"); - return -ENOMEM; - } - CDEBUG(D_NET, "allocated conn 0x%p for peer 0x%p\n", conn, peer); - - memset(conn, 0, sizeof(*conn)); - - /* conn->mxk_incarnation = 0 - will be set by peer */ - atomic_set(&conn->mxk_refcount, 2); /* ref for owning peer - and one for the caller */ - conn->mxk_peer = peer; - /* mxk_epa - to be set after mx_iconnect() */ - INIT_LIST_HEAD(&conn->mxk_list); - spin_lock_init(&conn->mxk_lock); - /* conn->mxk_timeout = 0 */ - conn->mxk_last_tx = jiffies; - conn->mxk_last_rx = conn->mxk_last_tx; - conn->mxk_credits = *kmxlnd_tunables.kmx_credits; - /* mxk_outstanding = 0 */ - conn->mxk_status = MXLND_CONN_INIT; - INIT_LIST_HEAD(&conn->mxk_tx_credit_queue); - INIT_LIST_HEAD(&conn->mxk_tx_free_queue); - /* conn->mxk_ntx_msgs = 0 */ - /* conn->mxk_ntx_data = 0 */ - /* conn->mxk_ntx_posted = 0 */ - /* conn->mxk_data_posted = 0 */ - INIT_LIST_HEAD(&conn->mxk_pending); - - *connp = conn; - - mxlnd_peer_addref(peer); /* add a ref for this conn */ - - /* add to front of peer's conns list */ - list_add(&conn->mxk_list, &peer->mxp_conns); - peer->mxp_conn = conn; - return 0; -} - -int -mxlnd_conn_alloc(struct kmx_conn **connp, struct kmx_peer *peer) -{ - int ret = 0; - spin_lock(&peer->mxp_lock); - ret = mxlnd_conn_alloc_locked(connp, peer); - spin_unlock(&peer->mxp_lock); - return ret; -} - -int -mxlnd_q_pending_ctx(struct kmx_ctx *ctx) -{ - int ret = 0; - struct kmx_conn *conn = ctx->mxc_conn; - - ctx->mxc_state = MXLND_CTX_PENDING; - if (conn != NULL) { - spin_lock(&conn->mxk_lock); - if (conn->mxk_status >= MXLND_CONN_INIT) { - list_add_tail(&ctx->mxc_list, &conn->mxk_pending); - if (conn->mxk_timeout == 0 || ctx->mxc_deadline < conn->mxk_timeout) { - conn->mxk_timeout = ctx->mxc_deadline; - } - } else { - ctx->mxc_state = MXLND_CTX_COMPLETED; - ret = -1; - } - spin_unlock(&conn->mxk_lock); - } - return ret; -} - -int -mxlnd_deq_pending_ctx(struct kmx_ctx *ctx) -{ - LASSERT(ctx->mxc_state == MXLND_CTX_PENDING || - ctx->mxc_state == MXLND_CTX_COMPLETED); - if (ctx->mxc_state != MXLND_CTX_PENDING && - ctx->mxc_state != MXLND_CTX_COMPLETED) { - CDEBUG(D_NETERROR, "deq ctx->mxc_state = %s\n", - mxlnd_ctxstate_to_str(ctx->mxc_state)); - } - ctx->mxc_state = MXLND_CTX_COMPLETED; - if (!list_empty(&ctx->mxc_list)) { - struct kmx_conn *conn = ctx->mxc_conn; - struct kmx_ctx *next = NULL; - LASSERT(conn != NULL); - spin_lock(&conn->mxk_lock); - list_del_init(&ctx->mxc_list); - conn->mxk_timeout = 0; - if (!list_empty(&conn->mxk_pending)) { - next = list_entry(conn->mxk_pending.next, struct kmx_ctx, mxc_list); - conn->mxk_timeout = next->mxc_deadline; - } - spin_unlock(&conn->mxk_lock); - } - return 0; -} - -/** - * mxlnd_peer_free - free the peer - * @peer - a kmx_peer pointer - * - * The calling function should decrement the rxs, drain the tx queues and - * remove the peer from the peers list first then destroy it. - */ -void -mxlnd_peer_free(struct kmx_peer *peer) -{ - CDEBUG(D_NET, "freeing peer 0x%p\n", peer); - - LASSERT (atomic_read(&peer->mxp_refcount) == 0); - - if (peer->mxp_host != NULL) { - spin_lock(&peer->mxp_host->mxh_lock); - peer->mxp_host->mxh_peer = NULL; - spin_unlock(&peer->mxp_host->mxh_lock); - } - if (!list_empty(&peer->mxp_peers)) { - /* assume we are locked */ - list_del_init(&peer->mxp_peers); - } - - MXLND_FREE (peer, sizeof (*peer)); - atomic_dec(&kmxlnd_data.kmx_npeers); - return; -} - -void -mxlnd_peer_hostname_to_nic_id(struct kmx_peer *peer) -{ - u64 nic_id = 0LL; - char name[MX_MAX_HOSTNAME_LEN + 1]; - mx_return_t mxret = MX_SUCCESS; - - memset(name, 0, sizeof(name)); - snprintf(name, sizeof(name), "%s:%d", peer->mxp_host->mxh_hostname, peer->mxp_host->mxh_board); - mxret = mx_hostname_to_nic_id(name, &nic_id); - if (mxret == MX_SUCCESS) { - peer->mxp_nic_id = nic_id; - } else { - CDEBUG(D_NETERROR, "mx_hostname_to_nic_id() failed for %s " - "with %s\n", name, mx_strerror(mxret)); - mxret = mx_hostname_to_nic_id(peer->mxp_host->mxh_hostname, &nic_id); - if (mxret == MX_SUCCESS) { - peer->mxp_nic_id = nic_id; - } else { - CDEBUG(D_NETERROR, "mx_hostname_to_nic_id() failed for %s " - "with %s\n", peer->mxp_host->mxh_hostname, - mx_strerror(mxret)); - } - } - return; -} - -/** - * mxlnd_peer_alloc - allocate and initialize a new peer struct - * @peerp - address of a kmx_peer pointer - * @nid - LNET node id - * - * Returns 0 on success and -ENOMEM on failure - */ -int -mxlnd_peer_alloc(struct kmx_peer **peerp, lnet_nid_t nid) -{ - int i = 0; - int ret = 0; - u32 addr = LNET_NIDADDR(nid); - struct kmx_peer *peer = NULL; - struct kmx_host *host = NULL; - - LASSERT (nid != LNET_NID_ANY && nid != 0LL); - - MXLND_ALLOC(peer, sizeof (*peer)); - if (peer == NULL) { - CDEBUG(D_NETERROR, "Cannot allocate peer for NID 0x%llx\n", nid); - return -ENOMEM; - } - CDEBUG(D_NET, "allocated peer 0x%p for NID 0x%llx\n", peer, nid); - - memset(peer, 0, sizeof(*peer)); - - list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) { - if (addr == host->mxh_addr) { - peer->mxp_host = host; - spin_lock(&host->mxh_lock); - host->mxh_peer = peer; - spin_unlock(&host->mxh_lock); - break; - } - } - if (peer->mxp_host == NULL) { - CDEBUG(D_NETERROR, "unknown host for NID 0x%llx\n", nid); - MXLND_FREE(peer, sizeof(*peer)); - return -ENXIO; - } - - peer->mxp_nid = nid; - /* peer->mxp_incarnation */ - atomic_set(&peer->mxp_refcount, 1); /* ref for kmx_peers list */ - mxlnd_peer_hostname_to_nic_id(peer); - - INIT_LIST_HEAD(&peer->mxp_peers); - spin_lock_init(&peer->mxp_lock); - INIT_LIST_HEAD(&peer->mxp_conns); - ret = mxlnd_conn_alloc(&peer->mxp_conn, peer); /* adds 2nd conn ref here... */ - if (ret != 0) { - mxlnd_peer_decref(peer); - return ret; - } - - for (i = 0; i < *kmxlnd_tunables.kmx_credits - 1; i++) { - struct kmx_ctx *rx = NULL; - ret = mxlnd_ctx_alloc(&rx, MXLND_REQ_RX); - if (ret != 0) { - mxlnd_reduce_idle_rxs(i); - mxlnd_conn_decref(peer->mxp_conn); /* drop peer's ref... */ - mxlnd_conn_decref(peer->mxp_conn); /* drop this function's ref */ - mxlnd_peer_decref(peer); - return ret; - } - spin_lock(&kmxlnd_data.kmx_rxs_lock); - list_add_tail(&rx->mxc_global_list, &kmxlnd_data.kmx_rxs); - spin_unlock(&kmxlnd_data.kmx_rxs_lock); - rx->mxc_put = -1; - mxlnd_put_idle_rx(rx); - } - /* peer->mxp_reconnect_time = 0 */ - /* peer->mxp_incompatible = 0 */ - - *peerp = peer; - return 0; -} - -/** - * mxlnd_nid_to_hash - hash the nid - * @nid - msg pointer - * - * Takes the u64 nid and XORs the lowest N bits by the next lowest N bits. - */ -static inline int -mxlnd_nid_to_hash(lnet_nid_t nid) -{ - return (nid & MXLND_HASH_MASK) ^ - ((nid & (MXLND_HASH_MASK << MXLND_HASH_BITS)) >> MXLND_HASH_BITS); -} - -static inline struct kmx_peer * -mxlnd_find_peer_by_nid_locked(lnet_nid_t nid) -{ - int found = 0; - int hash = 0; - struct kmx_peer *peer = NULL; - - hash = mxlnd_nid_to_hash(nid); - - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[hash], mxp_peers) { - if (peer->mxp_nid == nid) { - found = 1; - mxlnd_peer_addref(peer); - break; - } - } - return (found ? peer : NULL); -} - -static inline struct kmx_peer * -mxlnd_find_peer_by_nid(lnet_nid_t nid) -{ - struct kmx_peer *peer = NULL; - - read_lock(&kmxlnd_data.kmx_peers_lock); - peer = mxlnd_find_peer_by_nid_locked(nid); - read_unlock(&kmxlnd_data.kmx_peers_lock); - return peer; -} - -static inline int -mxlnd_tx_requires_credit(struct kmx_ctx *tx) -{ - return (tx->mxc_msg_type == MXLND_MSG_EAGER || - tx->mxc_msg_type == MXLND_MSG_GET_REQ || - tx->mxc_msg_type == MXLND_MSG_PUT_REQ || - tx->mxc_msg_type == MXLND_MSG_NOOP); -} - -/** - * mxlnd_init_msg - set type and number of bytes - * @msg - msg pointer - * @type - of message - * @body_nob - bytes in msg body - */ -static inline void -mxlnd_init_msg(kmx_msg_t *msg, u8 type, int body_nob) -{ - msg->mxm_type = type; - msg->mxm_nob = offsetof(kmx_msg_t, mxm_u) + body_nob; -} - -static inline void -mxlnd_init_tx_msg (struct kmx_ctx *tx, u8 type, int body_nob, lnet_nid_t nid) -{ - int nob = offsetof (kmx_msg_t, mxm_u) + body_nob; - struct kmx_msg *msg = NULL; - - LASSERT (tx != NULL); - LASSERT (nob <= MXLND_EAGER_SIZE); - - tx->mxc_nid = nid; - /* tx->mxc_peer should have already been set if we know it */ - tx->mxc_msg_type = type; - tx->mxc_nseg = 1; - /* tx->mxc_seg.segment_ptr is already pointing to mxc_page */ - tx->mxc_seg.segment_length = nob; - tx->mxc_pin_type = MX_PIN_PHYSICAL; - //tx->mxc_state = MXLND_CTX_PENDING; - - msg = tx->mxc_msg; - msg->mxm_type = type; - msg->mxm_nob = nob; - - return; -} - -static inline __u32 -mxlnd_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -/** - * mxlnd_pack_msg - complete msg info - * @tx - msg to send - */ -static inline void -mxlnd_pack_msg(struct kmx_ctx *tx) -{ - struct kmx_msg *msg = tx->mxc_msg; - - /* type and nob should already be set in init_msg() */ - msg->mxm_magic = MXLND_MSG_MAGIC; - msg->mxm_version = MXLND_MSG_VERSION; - /* mxm_type */ - /* don't use mxlnd_tx_requires_credit() since we want PUT_ACK to - * return credits as well */ - if (tx->mxc_msg_type != MXLND_MSG_CONN_REQ && - tx->mxc_msg_type != MXLND_MSG_CONN_ACK) { - spin_lock(&tx->mxc_conn->mxk_lock); - msg->mxm_credits = tx->mxc_conn->mxk_outstanding; - tx->mxc_conn->mxk_outstanding = 0; - spin_unlock(&tx->mxc_conn->mxk_lock); - } else { - msg->mxm_credits = 0; - } - /* mxm_nob */ - msg->mxm_cksum = 0; - msg->mxm_srcnid = lnet_ptlcompat_srcnid(kmxlnd_data.kmx_ni->ni_nid, tx->mxc_nid); - msg->mxm_srcstamp = kmxlnd_data.kmx_incarnation; - msg->mxm_dstnid = tx->mxc_nid; - /* if it is a new peer, the dststamp will be 0 */ - msg->mxm_dststamp = tx->mxc_conn->mxk_incarnation; - msg->mxm_seq = tx->mxc_cookie; - - if (*kmxlnd_tunables.kmx_cksum) { - msg->mxm_cksum = mxlnd_cksum(msg, msg->mxm_nob); - } -} - -int -mxlnd_unpack_msg(kmx_msg_t *msg, int nob) -{ - const int hdr_size = offsetof(kmx_msg_t, mxm_u); - __u32 msg_cksum = 0; - int flip = 0; - int msg_nob = 0; - - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CDEBUG(D_NETERROR, "not enough bytes for magic + hdr: %d\n", nob); - return -EPROTO; - } - - if (msg->mxm_magic == MXLND_MSG_MAGIC) { - flip = 0; - } else if (msg->mxm_magic == __swab32(MXLND_MSG_MAGIC)) { - flip = 1; - } else { - CDEBUG(D_NETERROR, "Bad magic: %08x\n", msg->mxm_magic); - return -EPROTO; - } - - if (msg->mxm_version != - (flip ? __swab16(MXLND_MSG_VERSION) : MXLND_MSG_VERSION)) { - CDEBUG(D_NETERROR, "Bad version: %d\n", msg->mxm_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CDEBUG(D_NETERROR, "not enough for a header: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->mxm_nob) : msg->mxm_nob; - if (msg_nob > nob) { - CDEBUG(D_NETERROR, "Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with mxm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->mxm_cksum) : msg->mxm_cksum; - msg->mxm_cksum = 0; - if (msg_cksum != 0 && msg_cksum != mxlnd_cksum(msg, msg_nob)) { - CDEBUG(D_NETERROR, "Bad checksum\n"); - return -EPROTO; - } - msg->mxm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - __swab16s(&msg->mxm_version); - CLASSERT (sizeof(msg->mxm_type) == 1); - CLASSERT (sizeof(msg->mxm_credits) == 1); - msg->mxm_nob = msg_nob; - __swab64s(&msg->mxm_srcnid); - __swab64s(&msg->mxm_srcstamp); - __swab64s(&msg->mxm_dstnid); - __swab64s(&msg->mxm_dststamp); - __swab64s(&msg->mxm_seq); - } - - if (msg->mxm_srcnid == LNET_NID_ANY) { - CDEBUG(D_NETERROR, "Bad src nid: %s\n", libcfs_nid2str(msg->mxm_srcnid)); - return -EPROTO; - } - - switch (msg->mxm_type) { - default: - CDEBUG(D_NETERROR, "Unknown message type %x\n", msg->mxm_type); - return -EPROTO; - - case MXLND_MSG_NOOP: - break; - - case MXLND_MSG_EAGER: - if (msg_nob < offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[0])) { - CDEBUG(D_NETERROR, "Short EAGER: %d(%d)\n", msg_nob, - (int)offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[0])); - return -EPROTO; - } - break; - - case MXLND_MSG_PUT_REQ: - if (msg_nob < hdr_size + sizeof(msg->mxm_u.put_req)) { - CDEBUG(D_NETERROR, "Short PUT_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->mxm_u.put_req))); - return -EPROTO; - } - if (flip) - __swab64s(&msg->mxm_u.put_req.mxprm_cookie); - break; - - case MXLND_MSG_PUT_ACK: - if (msg_nob < hdr_size + sizeof(msg->mxm_u.put_ack)) { - CDEBUG(D_NETERROR, "Short PUT_ACK: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->mxm_u.put_ack))); - return -EPROTO; - } - if (flip) { - __swab64s(&msg->mxm_u.put_ack.mxpam_src_cookie); - __swab64s(&msg->mxm_u.put_ack.mxpam_dst_cookie); - } - break; - - case MXLND_MSG_GET_REQ: - if (msg_nob < hdr_size + sizeof(msg->mxm_u.get_req)) { - CDEBUG(D_NETERROR, "Short GET_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->mxm_u.get_req))); - return -EPROTO; - } - if (flip) { - __swab64s(&msg->mxm_u.get_req.mxgrm_cookie); - } - break; - - case MXLND_MSG_CONN_REQ: - case MXLND_MSG_CONN_ACK: - if (msg_nob < hdr_size + sizeof(msg->mxm_u.conn_req)) { - CDEBUG(D_NETERROR, "Short connreq/ack: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->mxm_u.conn_req))); - return -EPROTO; - } - if (flip) { - __swab32s(&msg->mxm_u.conn_req.mxcrm_queue_depth); - __swab32s(&msg->mxm_u.conn_req.mxcrm_eager_size); - } - break; - } - return 0; -} - -/** - * mxlnd_recv_msg - * @lntmsg - the LNET msg that this is continuing. If EAGER, then NULL. - * @rx - * @msg_type - * @cookie - * @length - length of incoming message - * @pending - add to kmx_pending (0 is NO and 1 is YES) - * - * The caller gets the rx and sets nid, peer and conn if known. - * - * Returns 0 on success and -1 on failure - */ -int -mxlnd_recv_msg(lnet_msg_t *lntmsg, struct kmx_ctx *rx, u8 msg_type, u64 cookie, u32 length) -{ - int ret = 0; - mx_return_t mxret = MX_SUCCESS; - uint64_t mask = 0xF00FFFFFFFFFFFFFLL; - - rx->mxc_msg_type = msg_type; - rx->mxc_lntmsg[0] = lntmsg; /* may be NULL if EAGER */ - rx->mxc_cookie = cookie; - /* rx->mxc_match may already be set */ - /* rx->mxc_seg.segment_ptr is already set */ - rx->mxc_seg.segment_length = length; - rx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT; - ret = mxlnd_q_pending_ctx(rx); - if (ret == -1) { - /* the caller is responsible for calling conn_decref() if needed */ - return -1; - } - mxret = mx_kirecv(kmxlnd_data.kmx_endpt, &rx->mxc_seg, 1, MX_PIN_PHYSICAL, - cookie, mask, (void *) rx, &rx->mxc_mxreq); - if (mxret != MX_SUCCESS) { - mxlnd_deq_pending_ctx(rx); - CDEBUG(D_NETERROR, "mx_kirecv() failed with %s (%d)\n", - mx_strerror(mxret), (int) mxret); - return -1; - } - return 0; -} - - -/** - * mxlnd_unexpected_recv - this is the callback function that will handle - * unexpected receives - * @context - NULL, ignore - * @source - the peer's mx_endpoint_addr_t - * @match_value - the msg's bit, should be MXLND_MASK_EAGER - * @length - length of incoming message - * @data_if_available - ignore - * - * If it is an eager-sized msg, we will call recv_msg() with the actual - * length. If it is a large message, we will call recv_msg() with a - * length of 0 bytes to drop it because we should never have a large, - * unexpected message. - * - * NOTE - The MX library blocks until this function completes. Make it as fast as - * possible. DO NOT allocate memory which can block! - * - * If we cannot get a rx or the conn is closed, drop the message on the floor - * (i.e. recv 0 bytes and ignore). - */ -mx_unexp_handler_action_t -mxlnd_unexpected_recv(void *context, mx_endpoint_addr_t source, - uint64_t match_value, uint32_t length, void *data_if_available) -{ - int ret = 0; - struct kmx_ctx *rx = NULL; - mx_ksegment_t seg; - u8 msg_type = 0; - u8 error = 0; - u64 cookie = 0LL; - - if (context != NULL) { - CDEBUG(D_NETERROR, "unexpected receive with non-NULL context\n"); - } - -#if MXLND_DEBUG - CDEBUG(D_NET, "unexpected_recv() bits=0x%llx length=%d\n", match_value, length); -#endif - - rx = mxlnd_get_idle_rx(); - if (rx != NULL) { - mxlnd_parse_match(match_value, &msg_type, &error, &cookie); - if (length <= MXLND_EAGER_SIZE) { - ret = mxlnd_recv_msg(NULL, rx, msg_type, match_value, length); - } else { - CDEBUG(D_NETERROR, "unexpected large receive with " - "match_value=0x%llx length=%d\n", - match_value, length); - ret = mxlnd_recv_msg(NULL, rx, msg_type, match_value, 0); - } - - if (ret == 0) { - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; - - /* NOTE to avoid a peer disappearing out from under us, - * read lock the peers lock first */ - read_lock(&kmxlnd_data.kmx_peers_lock); - mx_get_endpoint_addr_context(source, (void **) &peer); - if (peer != NULL) { - mxlnd_peer_addref(peer); /* add a ref... */ - spin_lock(&peer->mxp_lock); - conn = peer->mxp_conn; - if (conn) { - mxlnd_conn_addref(conn); /* add ref until rx completed */ - mxlnd_peer_decref(peer); /* and drop peer ref */ - rx->mxc_conn = conn; - } - spin_unlock(&peer->mxp_lock); - rx->mxc_peer = peer; - rx->mxc_nid = peer->mxp_nid; - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - } else { - CDEBUG(D_NETERROR, "could not post receive\n"); - mxlnd_put_idle_rx(rx); - } - } - - if (rx == NULL || ret != 0) { - if (rx == NULL) { - CDEBUG(D_NETERROR, "no idle rxs available - dropping rx\n"); - } else { - /* ret != 0 */ - CDEBUG(D_NETERROR, "disconnected peer - dropping rx\n"); - } - seg.segment_ptr = 0LL; - seg.segment_length = 0; - mx_kirecv(kmxlnd_data.kmx_endpt, &seg, 1, MX_PIN_PHYSICAL, - match_value, 0xFFFFFFFFFFFFFFFFLL, NULL, NULL); - } - - return MX_RECV_CONTINUE; -} - - -int -mxlnd_get_peer_info(int index, lnet_nid_t *nidp, int *count) -{ - int i = 0; - int ret = -ENOENT; - struct kmx_peer *peer = NULL; - - read_lock(&kmxlnd_data.kmx_peers_lock); - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) { - if (index-- > 0) - continue; - - *nidp = peer->mxp_nid; - *count = atomic_read(&peer->mxp_refcount); - ret = 0; - break; - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - - return ret; -} - -void -mxlnd_del_peer_locked(struct kmx_peer *peer) -{ - list_del_init(&peer->mxp_peers); /* remove from the global list */ - if (peer->mxp_conn) mxlnd_conn_disconnect(peer->mxp_conn, 1, 0); - mxlnd_peer_decref(peer); /* drop global list ref */ - return; -} - -int -mxlnd_del_peer(lnet_nid_t nid) -{ - int i = 0; - int ret = 0; - struct kmx_peer *peer = NULL; - struct kmx_peer *next = NULL; - - if (nid != LNET_NID_ANY) { - peer = mxlnd_find_peer_by_nid(nid); /* adds peer ref */ - } - write_lock(&kmxlnd_data.kmx_peers_lock); - if (nid != LNET_NID_ANY) { - if (peer == NULL) { - ret = -ENOENT; - } else { - mxlnd_peer_decref(peer); /* and drops it */ - mxlnd_del_peer_locked(peer); - } - } else { /* LNET_NID_ANY */ - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry_safe(peer, next, - &kmxlnd_data.kmx_peers[i], mxp_peers) { - mxlnd_del_peer_locked(peer); - } - } - } - write_unlock(&kmxlnd_data.kmx_peers_lock); - - return ret; -} - -struct kmx_conn * -mxlnd_get_conn_by_idx(int index) -{ - int i = 0; - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; - - read_lock(&kmxlnd_data.kmx_peers_lock); - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) { - spin_lock(&peer->mxp_lock); - list_for_each_entry(conn, &peer->mxp_conns, mxk_list) { - if (index-- > 0) { - continue; - } - - mxlnd_conn_addref(conn); /* add ref here, dec in ctl() */ - spin_unlock(&peer->mxp_lock); - read_unlock(&kmxlnd_data.kmx_peers_lock); - return conn; - } - spin_unlock(&peer->mxp_lock); - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - - return NULL; -} - -void -mxlnd_close_matching_conns_locked(struct kmx_peer *peer) -{ - struct kmx_conn *conn = NULL; - struct kmx_conn *next = NULL; - - spin_lock(&peer->mxp_lock); - list_for_each_entry_safe(conn, next, &peer->mxp_conns, mxk_list) { - mxlnd_conn_disconnect(conn, 0 , 0); - } - spin_unlock(&peer->mxp_lock); - return; -} - -int -mxlnd_close_matching_conns(lnet_nid_t nid) -{ - int i = 0; - int ret = 0; - struct kmx_peer *peer = NULL; - - read_lock(&kmxlnd_data.kmx_peers_lock); - if (nid != LNET_NID_ANY) { - peer = mxlnd_find_peer_by_nid(nid); /* adds peer ref */ - if (peer == NULL) { - ret = -ENOENT; - } else { - mxlnd_close_matching_conns_locked(peer); - mxlnd_peer_decref(peer); /* and drops it here */ - } - } else { /* LNET_NID_ANY */ - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) - mxlnd_close_matching_conns_locked(peer); - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - - return ret; -} - -/** - * mxlnd_ctl - modify MXLND parameters - * @ni - LNET interface handle - * @cmd - command to change - * @arg - the ioctl data - * - * Not implemented yet. - */ -int -mxlnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int ret = -EINVAL; - - LASSERT (ni == kmxlnd_data.kmx_ni); - - switch (cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - int count = 0; - - ret = mxlnd_get_peer_info(data->ioc_count, &nid, &count); - data->ioc_nid = nid; - data->ioc_count = count; - break; - } - case IOC_LIBCFS_DEL_PEER: { - ret = mxlnd_del_peer(data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - struct kmx_conn *conn = NULL; - - conn = mxlnd_get_conn_by_idx(data->ioc_count); - if (conn == NULL) { - ret = -ENOENT; - } else { - ret = 0; - data->ioc_nid = conn->mxk_peer->mxp_nid; - mxlnd_conn_decref(conn); /* dec ref taken in get_conn_by_idx() */ - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - ret = mxlnd_close_matching_conns(data->ioc_nid); - break; - } - default: - CDEBUG(D_NETERROR, "unknown ctl(%d)\n", cmd); - break; - } - - return ret; -} - -/** - * mxlnd_peer_queue_tx_locked - add the tx to the global tx queue - * @tx - * - * Add the tx to the peer's msg or data queue. The caller has locked the peer. - */ -void -mxlnd_peer_queue_tx_locked(struct kmx_ctx *tx) -{ - u8 msg_type = tx->mxc_msg_type; - //struct kmx_peer *peer = tx->mxc_peer; - struct kmx_conn *conn = tx->mxc_conn; - - LASSERT (msg_type != 0); - LASSERT (tx->mxc_nid != 0); - LASSERT (tx->mxc_peer != NULL); - LASSERT (tx->mxc_conn != NULL); - - tx->mxc_incarnation = conn->mxk_incarnation; - - if (msg_type != MXLND_MSG_PUT_DATA && - msg_type != MXLND_MSG_GET_DATA) { - /* msg style tx */ - if (mxlnd_tx_requires_credit(tx)) { - list_add_tail(&tx->mxc_list, &conn->mxk_tx_credit_queue); - conn->mxk_ntx_msgs++; - } else if (msg_type == MXLND_MSG_CONN_REQ || - msg_type == MXLND_MSG_CONN_ACK) { - /* put conn msgs at the front of the queue */ - list_add(&tx->mxc_list, &conn->mxk_tx_free_queue); - } else { - /* PUT_ACK, PUT_NAK */ - list_add_tail(&tx->mxc_list, &conn->mxk_tx_free_queue); - conn->mxk_ntx_msgs++; - } - } else { - /* data style tx */ - list_add_tail(&tx->mxc_list, &conn->mxk_tx_free_queue); - conn->mxk_ntx_data++; - } - - return; -} - -/** - * mxlnd_peer_queue_tx - add the tx to the global tx queue - * @tx - * - * Add the tx to the peer's msg or data queue - */ -static inline void -mxlnd_peer_queue_tx(struct kmx_ctx *tx) -{ - LASSERT(tx->mxc_peer != NULL); - LASSERT(tx->mxc_conn != NULL); - spin_lock(&tx->mxc_conn->mxk_lock); - mxlnd_peer_queue_tx_locked(tx); - spin_unlock(&tx->mxc_conn->mxk_lock); - - return; -} - -/** - * mxlnd_queue_tx - add the tx to the global tx queue - * @tx - * - * Add the tx to the global queue and up the tx_queue_sem - */ -void -mxlnd_queue_tx(struct kmx_ctx *tx) -{ - struct kmx_peer *peer = tx->mxc_peer; - LASSERT (tx->mxc_nid != 0); - - if (peer != NULL) { - if (peer->mxp_incompatible && - tx->mxc_msg_type != MXLND_MSG_CONN_ACK) { - /* let this fail now */ - tx->mxc_status.code = -ECONNABORTED; - mxlnd_conn_decref(peer->mxp_conn); - mxlnd_put_idle_tx(tx); - return; - } - if (tx->mxc_conn == NULL) { - int ret = 0; - struct kmx_conn *conn = NULL; - - ret = mxlnd_conn_alloc(&conn, peer); /* adds 2nd ref for tx... */ - if (ret != 0) { - tx->mxc_status.code = ret; - mxlnd_put_idle_tx(tx); - goto done; - } - tx->mxc_conn = conn; - mxlnd_peer_decref(peer); /* and takes it from peer */ - } - LASSERT(tx->mxc_conn != NULL); - mxlnd_peer_queue_tx(tx); - mxlnd_check_sends(peer); - } else { - spin_lock(&kmxlnd_data.kmx_tx_queue_lock); - list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_queue); - spin_unlock(&kmxlnd_data.kmx_tx_queue_lock); - up(&kmxlnd_data.kmx_tx_queue_sem); - } -done: - return; -} - -int -mxlnd_setup_iov(struct kmx_ctx *ctx, u32 niov, struct iovec *iov, u32 offset, u32 nob) -{ - int i = 0; - int sum = 0; - int old_sum = 0; - int nseg = 0; - int first_iov = -1; - int first_iov_offset = 0; - int first_found = 0; - int last_iov = -1; - int last_iov_length = 0; - mx_ksegment_t *seg = NULL; - - if (niov == 0) return 0; - LASSERT(iov != NULL); - - for (i = 0; i < niov; i++) { - sum = old_sum + (u32) iov[i].iov_len; - if (!first_found && (sum > offset)) { - first_iov = i; - first_iov_offset = offset - old_sum; - first_found = 1; - sum = (u32) iov[i].iov_len - first_iov_offset; - old_sum = 0; - } - if (sum >= nob) { - last_iov = i; - last_iov_length = (u32) iov[i].iov_len - (sum - nob); - if (first_iov == last_iov) last_iov_length -= first_iov_offset; - break; - } - old_sum = sum; - } - LASSERT(first_iov >= 0 && last_iov >= first_iov); - nseg = last_iov - first_iov + 1; - LASSERT(nseg > 0); - - MXLND_ALLOC (seg, nseg * sizeof(*seg)); - if (seg == NULL) { - CDEBUG(D_NETERROR, "MXLND_ALLOC() failed\n"); - return -1; - } - memset(seg, 0, nseg * sizeof(*seg)); - ctx->mxc_nseg = nseg; - sum = 0; - for (i = 0; i < nseg; i++) { - seg[i].segment_ptr = MX_KVA_TO_U64(iov[first_iov + i].iov_base); - seg[i].segment_length = (u32) iov[first_iov + i].iov_len; - if (i == 0) { - seg[i].segment_ptr += (u64) first_iov_offset; - seg[i].segment_length -= (u32) first_iov_offset; - } - if (i == (nseg - 1)) { - seg[i].segment_length = (u32) last_iov_length; - } - sum += seg[i].segment_length; - } - ctx->mxc_seg_list = seg; - ctx->mxc_pin_type = MX_PIN_KERNEL; -#ifdef MX_PIN_FULLPAGES - ctx->mxc_pin_type |= MX_PIN_FULLPAGES; -#endif - LASSERT(nob == sum); - return 0; -} - -int -mxlnd_setup_kiov(struct kmx_ctx *ctx, u32 niov, lnet_kiov_t *kiov, u32 offset, u32 nob) -{ - int i = 0; - int sum = 0; - int old_sum = 0; - int nseg = 0; - int first_kiov = -1; - int first_kiov_offset = 0; - int first_found = 0; - int last_kiov = -1; - int last_kiov_length = 0; - mx_ksegment_t *seg = NULL; - - if (niov == 0) return 0; - LASSERT(kiov != NULL); - - for (i = 0; i < niov; i++) { - sum = old_sum + kiov[i].kiov_len; - if (i == 0) sum -= kiov[i].kiov_offset; - if (!first_found && (sum > offset)) { - first_kiov = i; - first_kiov_offset = offset - old_sum; - //if (i == 0) first_kiov_offset + kiov[i].kiov_offset; - if (i == 0) first_kiov_offset = kiov[i].kiov_offset; - first_found = 1; - sum = kiov[i].kiov_len - first_kiov_offset; - old_sum = 0; - } - if (sum >= nob) { - last_kiov = i; - last_kiov_length = kiov[i].kiov_len - (sum - nob); - if (first_kiov == last_kiov) last_kiov_length -= first_kiov_offset; - break; - } - old_sum = sum; - } - LASSERT(first_kiov >= 0 && last_kiov >= first_kiov); - nseg = last_kiov - first_kiov + 1; - LASSERT(nseg > 0); - - MXLND_ALLOC (seg, nseg * sizeof(*seg)); - if (seg == NULL) { - CDEBUG(D_NETERROR, "MXLND_ALLOC() failed\n"); - return -1; - } - memset(seg, 0, niov * sizeof(*seg)); - ctx->mxc_nseg = niov; - sum = 0; - for (i = 0; i < niov; i++) { - seg[i].segment_ptr = lnet_page2phys(kiov[first_kiov + i].kiov_page); - seg[i].segment_length = kiov[first_kiov + i].kiov_len; - if (i == 0) { - seg[i].segment_ptr += (u64) first_kiov_offset; - /* we have to add back the original kiov_offset */ - seg[i].segment_length -= first_kiov_offset + - kiov[first_kiov].kiov_offset; - } - if (i == (nseg - 1)) { - seg[i].segment_length = last_kiov_length; - } - sum += seg[i].segment_length; - } - ctx->mxc_seg_list = seg; - ctx->mxc_pin_type = MX_PIN_PHYSICAL; -#ifdef MX_PIN_FULLPAGES - ctx->mxc_pin_type |= MX_PIN_FULLPAGES; -#endif - LASSERT(nob == sum); - return 0; -} - -void -mxlnd_send_nak(struct kmx_ctx *tx, lnet_nid_t nid, int type, int status, __u64 cookie) -{ - LASSERT(type == MXLND_MSG_PUT_ACK); - mxlnd_init_tx_msg(tx, type, sizeof(kmx_putack_msg_t), tx->mxc_nid); - tx->mxc_cookie = cookie; - tx->mxc_msg->mxm_u.put_ack.mxpam_src_cookie = cookie; - tx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie = ((u64) status << 52); /* error code */ - tx->mxc_match = mxlnd_create_match(tx, status); - - mxlnd_queue_tx(tx); -} - - -/** - * mxlnd_send_data - get tx, map [k]iov, queue tx - * @ni - * @lntmsg - * @peer - * @msg_type - * @cookie - * - * This setups the DATA send for PUT or GET. - * - * On success, it queues the tx, on failure it calls lnet_finalize() - */ -void -mxlnd_send_data(lnet_ni_t *ni, lnet_msg_t *lntmsg, struct kmx_peer *peer, u8 msg_type, u64 cookie) -{ - int ret = 0; - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - struct kmx_ctx *tx = NULL; - - LASSERT(lntmsg != NULL); - LASSERT(peer != NULL); - LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA); - LASSERT((cookie>>52) == 0); - - tx = mxlnd_get_idle_tx(); - if (tx == NULL) { - CDEBUG(D_NETERROR, "Can't allocate %s tx for %s\n", - msg_type == MXLND_MSG_PUT_DATA ? "PUT_DATA" : "GET_DATA", - libcfs_nid2str(target.nid)); - goto failed_0; - } - tx->mxc_nid = target.nid; - /* NOTE called when we have a ref on the conn, get one for this tx */ - mxlnd_conn_addref(peer->mxp_conn); - tx->mxc_peer = peer; - tx->mxc_conn = peer->mxp_conn; - tx->mxc_msg_type = msg_type; - tx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT; - tx->mxc_state = MXLND_CTX_PENDING; - tx->mxc_lntmsg[0] = lntmsg; - tx->mxc_cookie = cookie; - tx->mxc_match = mxlnd_create_match(tx, 0); - - /* This setups up the mx_ksegment_t to send the DATA payload */ - if (nob == 0) { - /* do not setup the segments */ - CDEBUG(D_NETERROR, "nob = 0; why didn't we use an EAGER reply " - "to %s?\n", libcfs_nid2str(target.nid)); - ret = 0; - } else if (kiov == NULL) { - ret = mxlnd_setup_iov(tx, niov, iov, offset, nob); - } else { - ret = mxlnd_setup_kiov(tx, niov, kiov, offset, nob); - } - if (ret != 0) { - CDEBUG(D_NETERROR, "Can't setup send DATA for %s\n", - libcfs_nid2str(target.nid)); - tx->mxc_status.code = -EIO; - goto failed_1; - } - mxlnd_queue_tx(tx); - return; - -failed_1: - mxlnd_conn_decref(peer->mxp_conn); - mxlnd_put_idle_tx(tx); - return; - -failed_0: - CDEBUG(D_NETERROR, "no tx avail\n"); - lnet_finalize(ni, lntmsg, -EIO); - return; -} - -/** - * mxlnd_recv_data - map [k]iov, post rx - * @ni - * @lntmsg - * @rx - * @msg_type - * @cookie - * - * This setups the DATA receive for PUT or GET. - * - * On success, it returns 0, on failure it returns -1 - */ -int -mxlnd_recv_data(lnet_ni_t *ni, lnet_msg_t *lntmsg, struct kmx_ctx *rx, u8 msg_type, u64 cookie) -{ - int ret = 0; - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - mx_return_t mxret = MX_SUCCESS; - - /* above assumes MXLND_MSG_PUT_DATA */ - if (msg_type == MXLND_MSG_GET_DATA) { - niov = lntmsg->msg_md->md_niov; - iov = lntmsg->msg_md->md_iov.iov; - kiov = lntmsg->msg_md->md_iov.kiov; - offset = 0; - nob = lntmsg->msg_md->md_length; - } - - LASSERT(lntmsg != NULL); - LASSERT(rx != NULL); - LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA); - LASSERT((cookie>>52) == 0); /* ensure top 12 bits are 0 */ - - rx->mxc_msg_type = msg_type; - rx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT; - rx->mxc_state = MXLND_CTX_PENDING; - rx->mxc_nid = target.nid; - /* if posting a GET_DATA, we may not yet know the peer */ - if (rx->mxc_peer != NULL) { - rx->mxc_conn = rx->mxc_peer->mxp_conn; - } - rx->mxc_lntmsg[0] = lntmsg; - rx->mxc_cookie = cookie; - rx->mxc_match = mxlnd_create_match(rx, 0); - /* This setups up the mx_ksegment_t to receive the DATA payload */ - if (kiov == NULL) { - ret = mxlnd_setup_iov(rx, niov, iov, offset, nob); - } else { - ret = mxlnd_setup_kiov(rx, niov, kiov, offset, nob); - } - if (msg_type == MXLND_MSG_GET_DATA) { - rx->mxc_lntmsg[1] = lnet_create_reply_msg(kmxlnd_data.kmx_ni, lntmsg); - if (rx->mxc_lntmsg[1] == NULL) { - CDEBUG(D_NETERROR, "Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - ret = -1; - } - } - if (ret != 0) { - CDEBUG(D_NETERROR, "Can't setup %s rx for %s\n", - msg_type == MXLND_MSG_PUT_DATA ? "PUT_DATA" : "GET_DATA", - libcfs_nid2str(target.nid)); - return -1; - } - ret = mxlnd_q_pending_ctx(rx); - if (ret == -1) { - return -1; - } - CDEBUG(D_NET, "receiving %s 0x%llx\n", mxlnd_msgtype_to_str(msg_type), rx->mxc_cookie); - mxret = mx_kirecv(kmxlnd_data.kmx_endpt, - rx->mxc_seg_list, rx->mxc_nseg, - rx->mxc_pin_type, rx->mxc_match, - 0xF00FFFFFFFFFFFFFLL, (void *) rx, - &rx->mxc_mxreq); - if (mxret != MX_SUCCESS) { - if (rx->mxc_conn != NULL) { - mxlnd_deq_pending_ctx(rx); - } - CDEBUG(D_NETERROR, "mx_kirecv() failed with %d for %s\n", - (int) mxret, libcfs_nid2str(target.nid)); - return -1; - } - - return 0; -} - -/** - * mxlnd_send - the LND required send function - * @ni - * @private - * @lntmsg - * - * This must not block. Since we may not have a peer struct for the receiver, - * it will append send messages on a global tx list. We will then up the - * tx_queued's semaphore to notify it of the new send. - */ -int -mxlnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - int ret = 0; - int type = lntmsg->msg_type; - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - lnet_process_id_t target = lntmsg->msg_target; - lnet_nid_t nid = target.nid; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - struct kmx_ctx *tx = NULL; - struct kmx_msg *txmsg = NULL; - struct kmx_ctx *rx = (struct kmx_ctx *) private; /* for REPLY */ - struct kmx_ctx *rx_data = NULL; - struct kmx_conn *conn = NULL; - int nob = 0; - uint32_t length = 0; - struct kmx_peer *peer = NULL; - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - /* private is used on LNET_GET_REPLY only, NULL for all other cases */ - - /* NOTE we may not know the peer if it is the very first PUT_REQ or GET_REQ - * to a new peer, use the nid */ - peer = mxlnd_find_peer_by_nid(nid); /* adds peer ref */ - if (peer != NULL) { - if (unlikely(peer->mxp_incompatible)) { - mxlnd_peer_decref(peer); /* drop ref taken above */ - } else { - spin_lock(&peer->mxp_lock); - conn = peer->mxp_conn; - if (conn) { - mxlnd_conn_addref(conn); - mxlnd_peer_decref(peer); /* drop peer ref taken above */ - } - spin_unlock(&peer->mxp_lock); - } - } - if (conn == NULL && peer != NULL) { - CDEBUG(D_NETERROR, "conn==NULL peer=0x%p nid=0x%llx payload_nob=%d type=%s\n", - peer, nid, payload_nob, mxlnd_lnetmsg_to_str(type)); - } - - switch (type) { - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need DATA? */ - nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[payload_nob]); - if (nob <= MXLND_EAGER_SIZE) - break; /* send EAGER */ - - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate %s tx for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(nid)); - if (conn) mxlnd_conn_decref(conn); - return -ENOMEM; - } - - /* the peer may be NULL */ - tx->mxc_peer = peer; - tx->mxc_conn = conn; /* may be NULL */ - /* we added a conn ref above */ - mxlnd_init_tx_msg (tx, MXLND_MSG_PUT_REQ, sizeof(kmx_putreq_msg_t), nid); - txmsg = tx->mxc_msg; - txmsg->mxm_u.put_req.mxprm_hdr = *hdr; - txmsg->mxm_u.put_req.mxprm_cookie = tx->mxc_cookie; - tx->mxc_match = mxlnd_create_match(tx, 0); - - /* we must post a receive _before_ sending the request. - * we need to determine how much to receive, it will be either - * a put_ack or a put_nak. The put_ack is larger, so use it. */ - - rx = mxlnd_get_idle_rx(); - if (unlikely(rx == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate rx for PUT_ACK for %s\n", - libcfs_nid2str(nid)); - mxlnd_put_idle_tx(tx); - if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */ - return -ENOMEM; - } - rx->mxc_nid = nid; - rx->mxc_peer = peer; - /* conn may be NULL but unlikely since the first msg is always small */ - /* NOTE no need to lock peer before adding conn ref since we took - * a conn ref for the tx (it cannot be freed between there and here ) */ - if (conn) mxlnd_conn_addref(conn); /* for this rx */ - rx->mxc_conn = conn; - rx->mxc_msg_type = MXLND_MSG_PUT_ACK; - rx->mxc_cookie = tx->mxc_cookie; - rx->mxc_match = mxlnd_create_match(rx, 0); - - length = offsetof(kmx_msg_t, mxm_u) + sizeof(kmx_putack_msg_t); - ret = mxlnd_recv_msg(lntmsg, rx, MXLND_MSG_PUT_ACK, rx->mxc_match, length); - if (unlikely(ret != 0)) { - CDEBUG(D_NETERROR, "recv_msg() failed for PUT_ACK for %s\n", - libcfs_nid2str(nid)); - rx->mxc_lntmsg[0] = NULL; - mxlnd_put_idle_rx(rx); - mxlnd_put_idle_tx(tx); - if (conn) { - mxlnd_conn_decref(conn); /* for the rx... */ - mxlnd_conn_decref(conn); /* and for the tx */ - } - return -EHOSTUNREACH; - } - - mxlnd_queue_tx(tx); - return 0; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send EAGER */ - - /* is the REPLY message too small for DATA? */ - nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[lntmsg->msg_md->md_length]); - if (nob <= MXLND_EAGER_SIZE) - break; /* send EAGER */ - - /* get tx (we need the cookie) , post rx for incoming DATA, - * then post GET_REQ tx */ - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate GET tx for %s\n", - libcfs_nid2str(nid)); - if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */ - return -ENOMEM; - } - rx_data = mxlnd_get_idle_rx(); - if (unlikely(rx_data == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate DATA rx for %s\n", - libcfs_nid2str(nid)); - mxlnd_put_idle_tx(tx); - if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */ - return -ENOMEM; - } - rx_data->mxc_peer = peer; - /* NOTE no need to lock peer before adding conn ref since we took - * a conn ref for the tx (it cannot be freed between there and here ) */ - if (conn) mxlnd_conn_addref(conn); /* for the rx_data */ - rx_data->mxc_conn = conn; /* may be NULL */ - - ret = mxlnd_recv_data(ni, lntmsg, rx_data, MXLND_MSG_GET_DATA, tx->mxc_cookie); - if (unlikely(ret != 0)) { - CDEBUG(D_NETERROR, "Can't setup GET sink for %s\n", - libcfs_nid2str(nid)); - mxlnd_put_idle_rx(rx_data); - mxlnd_put_idle_tx(tx); - if (conn) { - mxlnd_conn_decref(conn); /* for the rx_data... */ - mxlnd_conn_decref(conn); /* and for the tx */ - } - return -EIO; - } - - tx->mxc_peer = peer; - tx->mxc_conn = conn; /* may be NULL */ - /* conn ref taken above */ - mxlnd_init_tx_msg(tx, MXLND_MSG_GET_REQ, sizeof(kmx_getreq_msg_t), nid); - txmsg = tx->mxc_msg; - txmsg->mxm_u.get_req.mxgrm_hdr = *hdr; - txmsg->mxm_u.get_req.mxgrm_cookie = tx->mxc_cookie; - tx->mxc_match = mxlnd_create_match(tx, 0); - - mxlnd_queue_tx(tx); - return 0; - - default: - LBUG(); - if (conn) mxlnd_conn_decref(conn); /* drop ref taken above */ - return -EIO; - } - - /* send EAGER */ - - LASSERT (offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[payload_nob]) - <= MXLND_EAGER_SIZE); - - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't send %s to %s: tx descs exhausted\n", - mxlnd_lnetmsg_to_str(type), libcfs_nid2str(nid)); - if (conn) mxlnd_conn_decref(conn); /* drop ref taken above */ - return -ENOMEM; - } - - tx->mxc_peer = peer; - tx->mxc_conn = conn; /* may be NULL */ - /* conn ref taken above */ - nob = offsetof(kmx_eager_msg_t, mxem_payload[payload_nob]); - mxlnd_init_tx_msg (tx, MXLND_MSG_EAGER, nob, nid); - tx->mxc_match = mxlnd_create_match(tx, 0); - - txmsg = tx->mxc_msg; - txmsg->mxm_u.eager.mxem_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(MXLND_EAGER_SIZE, txmsg, - offsetof(kmx_msg_t, mxm_u.eager.mxem_payload), - payload_niov, payload_kiov, payload_offset, payload_nob); - else - lnet_copy_iov2flat(MXLND_EAGER_SIZE, txmsg, - offsetof(kmx_msg_t, mxm_u.eager.mxem_payload), - payload_niov, payload_iov, payload_offset, payload_nob); - - tx->mxc_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - mxlnd_queue_tx(tx); - return 0; -} - -/** - * mxlnd_recv - the LND required recv function - * @ni - * @private - * @lntmsg - * @delayed - * @niov - * @kiov - * @offset - * @mlen - * @rlen - * - * This must not block. - */ -int -mxlnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - int ret = 0; - int nob = 0; - int len = 0; - struct kmx_ctx *rx = private; - struct kmx_msg *rxmsg = rx->mxc_msg; - lnet_nid_t nid = rx->mxc_nid; - struct kmx_ctx *tx = NULL; - struct kmx_msg *txmsg = NULL; - struct kmx_peer *peer = rx->mxc_peer; - struct kmx_conn *conn = peer->mxp_conn; - u64 cookie = 0LL; - int msg_type = rxmsg->mxm_type; - int repost = 1; - int credit = 0; - int finalize = 0; - - LASSERT (mlen <= rlen); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - LASSERT (peer != NULL); - - /* conn_addref(conn) already taken for the primary rx */ - - switch (msg_type) { - case MXLND_MSG_EAGER: - nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[rlen]); - len = rx->mxc_status.xfer_length; - if (unlikely(nob > len)) { - CDEBUG(D_NETERROR, "Eager message from %s too big: %d(%d)\n", - libcfs_nid2str(nid), nob, len); - ret = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov(niov, kiov, offset, - MXLND_EAGER_SIZE, rxmsg, - offsetof(kmx_msg_t, mxm_u.eager.mxem_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - MXLND_EAGER_SIZE, rxmsg, - offsetof(kmx_msg_t, mxm_u.eager.mxem_payload), - mlen); - finalize = 1; - credit = 1; - break; - - case MXLND_MSG_PUT_REQ: - /* we are going to reuse the rx, store the needed info */ - cookie = rxmsg->mxm_u.put_req.mxprm_cookie; - - /* get tx, post rx, send PUT_ACK */ - - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate tx for %s\n", libcfs_nid2str(nid)); - /* Not replying will break the connection */ - ret = -ENOMEM; - break; - } - if (unlikely(mlen == 0)) { - finalize = 1; - tx->mxc_peer = peer; - tx->mxc_conn = conn; - mxlnd_send_nak(tx, nid, MXLND_MSG_PUT_ACK, 0, cookie); - /* repost = 1 */ - break; - } - - mxlnd_init_tx_msg(tx, MXLND_MSG_PUT_ACK, sizeof(kmx_putack_msg_t), nid); - tx->mxc_peer = peer; - tx->mxc_conn = conn; - /* no need to lock peer first since we already have a ref */ - mxlnd_conn_addref(conn); /* for the tx */ - txmsg = tx->mxc_msg; - txmsg->mxm_u.put_ack.mxpam_src_cookie = cookie; - txmsg->mxm_u.put_ack.mxpam_dst_cookie = tx->mxc_cookie; - tx->mxc_cookie = cookie; - tx->mxc_match = mxlnd_create_match(tx, 0); - - /* we must post a receive _before_ sending the PUT_ACK */ - mxlnd_ctx_init(rx); - rx->mxc_state = MXLND_CTX_PREP; - rx->mxc_peer = peer; - rx->mxc_conn = conn; - /* do not take another ref for this rx, it is already taken */ - rx->mxc_nid = peer->mxp_nid; - ret = mxlnd_recv_data(ni, lntmsg, rx, MXLND_MSG_PUT_DATA, - txmsg->mxm_u.put_ack.mxpam_dst_cookie); - - if (unlikely(ret != 0)) { - /* Notify peer that it's over */ - CDEBUG(D_NETERROR, "Can't setup PUT_DATA rx for %s: %d\n", - libcfs_nid2str(nid), ret); - mxlnd_ctx_init(tx); - tx->mxc_state = MXLND_CTX_PREP; - tx->mxc_peer = peer; - tx->mxc_conn = conn; - /* finalize = 0, let the PUT_ACK tx finalize this */ - tx->mxc_lntmsg[0] = rx->mxc_lntmsg[0]; - tx->mxc_lntmsg[1] = rx->mxc_lntmsg[1]; - /* conn ref already taken above */ - mxlnd_send_nak(tx, nid, MXLND_MSG_PUT_ACK, ret, cookie); - /* repost = 1 */ - break; - } - - mxlnd_queue_tx(tx); - /* do not return a credit until after PUT_DATA returns */ - repost = 0; - break; - - case MXLND_MSG_GET_REQ: - if (likely(lntmsg != NULL)) { - mxlnd_send_data(ni, lntmsg, rx->mxc_peer, MXLND_MSG_GET_DATA, - rx->mxc_msg->mxm_u.get_req.mxgrm_cookie); - } else { - /* GET didn't match anything */ - /* The initiator has a rx mapped to [k]iov. We cannot send a nak. - * We have to embed the error code in the match bits. - * Send the error in bits 52-59 and the cookie in bits 0-51 */ - u64 cookie = rxmsg->mxm_u.get_req.mxgrm_cookie; - - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't get tx for GET NAK for %s\n", - libcfs_nid2str(nid)); - ret = -ENOMEM; - break; - } - tx->mxc_msg_type = MXLND_MSG_GET_DATA; - tx->mxc_state = MXLND_CTX_PENDING; - tx->mxc_nid = nid; - tx->mxc_peer = peer; - tx->mxc_conn = conn; - /* no need to lock peer first since we already have a ref */ - mxlnd_conn_addref(conn); /* for this tx */ - tx->mxc_cookie = cookie; - tx->mxc_match = mxlnd_create_match(tx, ENODATA); - tx->mxc_pin_type = MX_PIN_PHYSICAL; - mxlnd_queue_tx(tx); - } - /* finalize lntmsg after tx completes */ - break; - - default: - LBUG(); - } - - if (repost) { - /* we received a message, increment peer's outstanding credits */ - if (credit == 1) { - spin_lock(&conn->mxk_lock); - conn->mxk_outstanding++; - spin_unlock(&conn->mxk_lock); - } - /* we are done with the rx */ - mxlnd_put_idle_rx(rx); - mxlnd_conn_decref(conn); - } - - if (finalize == 1) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg, 0); - - /* we received a credit, see if we can use it to send a msg */ - if (credit) mxlnd_check_sends(peer); - - return ret; -} - -void -mxlnd_sleep(unsigned long timeout) -{ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(timeout); - return; -} - -/** - * mxlnd_tx_queued - the generic send queue thread - * @arg - thread id (as a void *) - * - * This thread moves send messages from the global tx_queue to the owning - * peer's tx_[msg|data]_queue. If the peer does not exist, it creates one and adds - * it to the global peer list. - */ -int -mxlnd_tx_queued(void *arg) -{ - long id = (long) arg; - int ret = 0; - int found = 0; - struct kmx_ctx *tx = NULL; - struct kmx_peer *peer = NULL; - struct list_head *tmp_tx = NULL; - - cfs_daemonize("mxlnd_tx_queued"); - //cfs_block_allsigs(); - - while (!kmxlnd_data.kmx_shutdown) { - ret = down_interruptible(&kmxlnd_data.kmx_tx_queue_sem); - if (kmxlnd_data.kmx_shutdown) - break; - if (ret != 0) // Should we check for -EINTR? - continue; - spin_lock(&kmxlnd_data.kmx_tx_queue_lock); - if (list_empty (&kmxlnd_data.kmx_tx_queue)) { - spin_unlock(&kmxlnd_data.kmx_tx_queue_lock); - continue; - } - tmp_tx = &kmxlnd_data.kmx_tx_queue; - tx = list_entry (tmp_tx->next, struct kmx_ctx, mxc_list); - list_del_init(&tx->mxc_list); - spin_unlock(&kmxlnd_data.kmx_tx_queue_lock); - - found = 0; - peer = mxlnd_find_peer_by_nid(tx->mxc_nid); /* adds peer ref */ - if (peer != NULL) { - tx->mxc_peer = peer; - spin_lock(&peer->mxp_lock); - if (peer->mxp_conn == NULL) { - ret = mxlnd_conn_alloc_locked(&peer->mxp_conn, peer); - if (ret != 0) { - /* out of memory, give up and fail tx */ - tx->mxc_status.code = -ENOMEM; - spin_unlock(&peer->mxp_lock); - mxlnd_peer_decref(peer); - mxlnd_put_idle_tx(tx); - continue; - } - } - tx->mxc_conn = peer->mxp_conn; - mxlnd_conn_addref(tx->mxc_conn); /* for this tx */ - spin_unlock(&peer->mxp_lock); - mxlnd_peer_decref(peer); /* drop peer ref taken above */ - mxlnd_queue_tx(tx); - found = 1; - } - if (found == 0) { - int hash = 0; - struct kmx_peer *peer = NULL; - struct kmx_peer *old = NULL; - - hash = mxlnd_nid_to_hash(tx->mxc_nid); - - LASSERT(tx->mxc_msg_type != MXLND_MSG_PUT_DATA && - tx->mxc_msg_type != MXLND_MSG_GET_DATA); - /* create peer */ - /* adds conn ref for this function */ - ret = mxlnd_peer_alloc(&peer, tx->mxc_nid); - if (ret != 0) { - /* finalize message */ - tx->mxc_status.code = ret; - mxlnd_put_idle_tx(tx); - continue; - } - tx->mxc_peer = peer; - tx->mxc_conn = peer->mxp_conn; - /* this tx will keep the conn ref taken in peer_alloc() */ - - /* add peer to global peer list, but look to see - * if someone already created it after we released - * the read lock */ - write_lock(&kmxlnd_data.kmx_peers_lock); - list_for_each_entry(old, &kmxlnd_data.kmx_peers[hash], mxp_peers) { - if (old->mxp_nid == peer->mxp_nid) { - /* somebody beat us here, we created a duplicate */ - found = 1; - break; - } - } - - if (found == 0) { - list_add_tail(&peer->mxp_peers, &kmxlnd_data.kmx_peers[hash]); - atomic_inc(&kmxlnd_data.kmx_npeers); - } else { - tx->mxc_peer = old; - spin_lock(&old->mxp_lock); - tx->mxc_conn = old->mxp_conn; - /* FIXME can conn be NULL? */ - LASSERT(old->mxp_conn != NULL); - mxlnd_conn_addref(old->mxp_conn); - spin_unlock(&old->mxp_lock); - mxlnd_reduce_idle_rxs(*kmxlnd_tunables.kmx_credits - 1); - mxlnd_conn_decref(peer->mxp_conn); /* drop ref taken above.. */ - mxlnd_conn_decref(peer->mxp_conn); /* drop peer's ref */ - mxlnd_peer_decref(peer); - } - write_unlock(&kmxlnd_data.kmx_peers_lock); - - mxlnd_queue_tx(tx); - } - } - mxlnd_thread_stop(id); - return 0; -} - -/* When calling this, we must not have the peer lock. */ -void -mxlnd_iconnect(struct kmx_peer *peer, u64 mask) -{ - mx_return_t mxret = MX_SUCCESS; - mx_request_t request; - struct kmx_conn *conn = peer->mxp_conn; - - /* NOTE we are holding a conn ref every time we call this function, - * we do not need to lock the peer before taking another ref */ - mxlnd_conn_addref(conn); /* hold until CONN_REQ or CONN_ACK completes */ - - LASSERT(mask == MXLND_MASK_ICON_REQ || - mask == MXLND_MASK_ICON_ACK); - - if (peer->mxp_reconnect_time == 0) { - peer->mxp_reconnect_time = jiffies; - } - - if (peer->mxp_nic_id == 0LL) { - mxlnd_peer_hostname_to_nic_id(peer); - if (peer->mxp_nic_id == 0LL) { - /* not mapped yet, return */ - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_INIT; - spin_unlock(&conn->mxk_lock); - if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) { - /* give up and notify LNET */ - mxlnd_conn_disconnect(conn, 0, 1); - mxlnd_conn_alloc(&peer->mxp_conn, peer); /* adds ref for this - function... */ - mxlnd_conn_decref(peer->mxp_conn); /* which we no - longer need */ - } - mxlnd_conn_decref(conn); - return; - } - } - - mxret = mx_iconnect(kmxlnd_data.kmx_endpt, peer->mxp_nic_id, - peer->mxp_host->mxh_ep_id, MXLND_MSG_MAGIC, mask, - (void *) peer, &request); - if (unlikely(mxret != MX_SUCCESS)) { - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - CDEBUG(D_NETERROR, "mx_iconnect() failed with %s (%d) to %s\n", - mx_strerror(mxret), mxret, libcfs_nid2str(peer->mxp_nid)); - mxlnd_conn_decref(conn); - } - return; -} - -#define MXLND_STATS 0 - -int -mxlnd_check_sends(struct kmx_peer *peer) -{ - int ret = 0; - int found = 0; - mx_return_t mxret = MX_SUCCESS; - struct kmx_ctx *tx = NULL; - struct kmx_conn *conn = NULL; - u8 msg_type = 0; - int credit = 0; - int status = 0; - int ntx_posted = 0; - int credits = 0; -#if MXLND_STATS - static unsigned long last = 0; -#endif - - if (unlikely(peer == NULL)) { - LASSERT(peer != NULL); - return -1; - } - spin_lock(&peer->mxp_lock); - conn = peer->mxp_conn; - /* NOTE take a ref for the duration of this function since it is called - * when there might not be any queued txs for this peer */ - if (conn) mxlnd_conn_addref(conn); /* for duration of this function */ - spin_unlock(&peer->mxp_lock); - - /* do not add another ref for this tx */ - - if (conn == NULL) { - /* we do not have any conns */ - return -1; - } - -#if MXLND_STATS - if (time_after(jiffies, last)) { - last = jiffies + HZ; - CDEBUG(D_NET, "status= %s credits= %d outstanding= %d ntx_msgs= %d " - "ntx_posted= %d ntx_data= %d data_posted= %d\n", - mxlnd_connstatus_to_str(conn->mxk_status), conn->mxk_credits, - conn->mxk_outstanding, conn->mxk_ntx_msgs, conn->mxk_ntx_posted, - conn->mxk_ntx_data, conn->mxk_data_posted); - } -#endif - - /* cache peer state for asserts */ - spin_lock(&conn->mxk_lock); - ntx_posted = conn->mxk_ntx_posted; - credits = conn->mxk_credits; - spin_unlock(&conn->mxk_lock); - - LASSERT(ntx_posted <= *kmxlnd_tunables.kmx_credits); - LASSERT(ntx_posted >= 0); - - LASSERT(credits <= *kmxlnd_tunables.kmx_credits); - LASSERT(credits >= 0); - - /* check number of queued msgs, ignore data */ - spin_lock(&conn->mxk_lock); - if (conn->mxk_outstanding >= MXLND_CREDIT_HIGHWATER) { - /* check if any txs queued that could return credits... */ - if (list_empty(&conn->mxk_tx_credit_queue) || conn->mxk_ntx_msgs == 0) { - /* if not, send a NOOP */ - tx = mxlnd_get_idle_tx(); - if (likely(tx != NULL)) { - tx->mxc_peer = peer; - tx->mxc_conn = peer->mxp_conn; - mxlnd_conn_addref(conn); /* for this tx */ - mxlnd_init_tx_msg (tx, MXLND_MSG_NOOP, 0, peer->mxp_nid); - tx->mxc_match = mxlnd_create_match(tx, 0); - mxlnd_peer_queue_tx_locked(tx); - found = 1; - goto done_locked; - } - } - } - spin_unlock(&conn->mxk_lock); - - /* if the peer is not ready, try to connect */ - spin_lock(&conn->mxk_lock); - if (unlikely(conn->mxk_status == MXLND_CONN_INIT || - conn->mxk_status == MXLND_CONN_FAIL || - conn->mxk_status == MXLND_CONN_REQ)) { - CDEBUG(D_NET, "status=%s\n", mxlnd_connstatus_to_str(conn->mxk_status)); - conn->mxk_status = MXLND_CONN_WAIT; - spin_unlock(&conn->mxk_lock); - mxlnd_iconnect(peer, MXLND_MASK_ICON_REQ); - goto done; - } - spin_unlock(&conn->mxk_lock); - - spin_lock(&conn->mxk_lock); - while (!list_empty(&conn->mxk_tx_free_queue) || - !list_empty(&conn->mxk_tx_credit_queue)) { - /* We have something to send. If we have a queued tx that does not - * require a credit (free), choose it since its completion will - * return a credit (here or at the peer), complete a DATA or - * CONN_REQ or CONN_ACK. */ - struct list_head *tmp_tx = NULL; - if (!list_empty(&conn->mxk_tx_free_queue)) { - tmp_tx = &conn->mxk_tx_free_queue; - } else { - tmp_tx = &conn->mxk_tx_credit_queue; - } - tx = list_entry(tmp_tx->next, struct kmx_ctx, mxc_list); - - msg_type = tx->mxc_msg_type; - - /* don't try to send a rx */ - LASSERT(tx->mxc_type == MXLND_REQ_TX); - - /* ensure that it is a valid msg type */ - LASSERT(msg_type == MXLND_MSG_CONN_REQ || - msg_type == MXLND_MSG_CONN_ACK || - msg_type == MXLND_MSG_NOOP || - msg_type == MXLND_MSG_EAGER || - msg_type == MXLND_MSG_PUT_REQ || - msg_type == MXLND_MSG_PUT_ACK || - msg_type == MXLND_MSG_PUT_DATA || - msg_type == MXLND_MSG_GET_REQ || - msg_type == MXLND_MSG_GET_DATA); - LASSERT(tx->mxc_peer == peer); - LASSERT(tx->mxc_nid == peer->mxp_nid); - - credit = mxlnd_tx_requires_credit(tx); - if (credit) { - - if (conn->mxk_ntx_posted == *kmxlnd_tunables.kmx_credits) { - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(peer->mxp_nid)); - goto done_locked; - } - - if (conn->mxk_credits == 0) { - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(peer->mxp_nid)); - goto done_locked; - } - - if (conn->mxk_credits == 1 && /* last credit reserved for */ - conn->mxk_outstanding == 0) { /* giving back credits */ - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(peer->mxp_nid)); - goto done_locked; - } - } - - if (unlikely(conn->mxk_status != MXLND_CONN_READY)) { - if ( ! (msg_type == MXLND_MSG_CONN_REQ || - msg_type == MXLND_MSG_CONN_ACK)) { - CDEBUG(D_NET, "peer status is %s for tx 0x%llx (%s)\n", - mxlnd_connstatus_to_str(conn->mxk_status), - tx->mxc_cookie, - mxlnd_msgtype_to_str(tx->mxc_msg_type)); - if (conn->mxk_status == MXLND_CONN_DISCONNECT) { - list_del_init(&tx->mxc_list); - tx->mxc_status.code = -ECONNABORTED; - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); - } - goto done_locked; - } - } - - list_del_init(&tx->mxc_list); - - /* handle credits, etc now while we have the lock to avoid races */ - if (credit) { - conn->mxk_credits--; - conn->mxk_ntx_posted++; - } - if (msg_type != MXLND_MSG_PUT_DATA && - msg_type != MXLND_MSG_GET_DATA) { - if (msg_type != MXLND_MSG_CONN_REQ && - msg_type != MXLND_MSG_CONN_ACK) { - conn->mxk_ntx_msgs--; - } - } - if (tx->mxc_incarnation == 0 && - conn->mxk_incarnation != 0) { - tx->mxc_incarnation = conn->mxk_incarnation; - } - spin_unlock(&conn->mxk_lock); - - /* if this is a NOOP and (1) mxp_conn->mxk_outstanding < CREDIT_HIGHWATER - * or (2) there is a non-DATA msg that can return credits in the - * queue, then drop this duplicate NOOP */ - if (unlikely(msg_type == MXLND_MSG_NOOP)) { - spin_lock(&conn->mxk_lock); - if ((conn->mxk_outstanding < MXLND_CREDIT_HIGHWATER) || - (conn->mxk_ntx_msgs >= 1)) { - conn->mxk_credits++; - conn->mxk_ntx_posted--; - spin_unlock(&conn->mxk_lock); - /* redundant NOOP */ - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_nid2str(peer->mxp_nid)); - found = 1; - goto done; - } - spin_unlock(&conn->mxk_lock); - } - - found = 1; - if (likely((msg_type != MXLND_MSG_PUT_DATA) && - (msg_type != MXLND_MSG_GET_DATA))) { - mxlnd_pack_msg(tx); - } - - //ret = -ECONNABORTED; - mxret = MX_SUCCESS; - - spin_lock(&conn->mxk_lock); - status = conn->mxk_status; - spin_unlock(&conn->mxk_lock); - - if (likely((status == MXLND_CONN_READY) || - (msg_type == MXLND_MSG_CONN_REQ) || - (msg_type == MXLND_MSG_CONN_ACK))) { - ret = 0; - if (msg_type != MXLND_MSG_CONN_REQ && - msg_type != MXLND_MSG_CONN_ACK) { - /* add to the pending list */ - ret = mxlnd_q_pending_ctx(tx); - if (ret == -1) { - /* FIXME the conn is disconnected, now what? */ - } - } else { - /* CONN_REQ/ACK */ - tx->mxc_state = MXLND_CTX_PENDING; - } - - if (ret == 0) { - if (likely(msg_type != MXLND_MSG_PUT_DATA && - msg_type != MXLND_MSG_GET_DATA)) { - /* send a msg style tx */ - LASSERT(tx->mxc_nseg == 1); - LASSERT(tx->mxc_pin_type == MX_PIN_PHYSICAL); - CDEBUG(D_NET, "sending %s 0x%llx\n", - mxlnd_msgtype_to_str(msg_type), - tx->mxc_cookie); - mxret = mx_kisend(kmxlnd_data.kmx_endpt, - &tx->mxc_seg, - tx->mxc_nseg, - tx->mxc_pin_type, - conn->mxk_epa, - tx->mxc_match, - (void *) tx, - &tx->mxc_mxreq); - } else { - /* send a DATA tx */ - spin_lock(&conn->mxk_lock); - conn->mxk_ntx_data--; - conn->mxk_data_posted++; - spin_unlock(&conn->mxk_lock); - CDEBUG(D_NET, "sending %s 0x%llx\n", - mxlnd_msgtype_to_str(msg_type), - tx->mxc_cookie); - mxret = mx_kisend(kmxlnd_data.kmx_endpt, - tx->mxc_seg_list, - tx->mxc_nseg, - tx->mxc_pin_type, - conn->mxk_epa, - tx->mxc_match, - (void *) tx, - &tx->mxc_mxreq); - } - } else { - mxret = MX_CONNECTION_FAILED; - } - if (likely(mxret == MX_SUCCESS)) { - ret = 0; - } else { - CDEBUG(D_NETERROR, "mx_kisend() failed with %s (%d) " - "sending to %s\n", mx_strerror(mxret), (int) mxret, - libcfs_nid2str(peer->mxp_nid)); - /* NOTE mx_kisend() only fails if there are not enough - * resources. Do not change the connection status. */ - if (mxret == MX_NO_RESOURCES) { - tx->mxc_status.code = -ENOMEM; - } else { - tx->mxc_status.code = -ECONNABORTED; - } - if (credit) { - spin_lock(&conn->mxk_lock); - conn->mxk_ntx_posted--; - conn->mxk_credits++; - spin_unlock(&conn->mxk_lock); - } else if (msg_type == MXLND_MSG_PUT_DATA || - msg_type == MXLND_MSG_GET_DATA) { - spin_lock(&conn->mxk_lock); - conn->mxk_data_posted--; - spin_unlock(&conn->mxk_lock); - } - if (msg_type != MXLND_MSG_PUT_DATA && - msg_type != MXLND_MSG_GET_DATA && - msg_type != MXLND_MSG_CONN_REQ && - msg_type != MXLND_MSG_CONN_ACK) { - spin_lock(&conn->mxk_lock); - conn->mxk_outstanding += tx->mxc_msg->mxm_credits; - spin_unlock(&conn->mxk_lock); - } - if (msg_type != MXLND_MSG_CONN_REQ && - msg_type != MXLND_MSG_CONN_ACK) { - /* remove from the pending list */ - mxlnd_deq_pending_ctx(tx); - } - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); - } - } - spin_lock(&conn->mxk_lock); - } -done_locked: - spin_unlock(&conn->mxk_lock); -done: - mxlnd_conn_decref(conn); /* drop ref taken at start of function */ - return found; -} - - -/** - * mxlnd_handle_tx_completion - a tx completed, progress or complete the msg - * @ctx - the tx descriptor - * - * Determine which type of send request it was and start the next step, if needed, - * or, if done, signal completion to LNET. After we are done, put back on the - * idle tx list. - */ -void -mxlnd_handle_tx_completion(struct kmx_ctx *tx) -{ - int failed = (tx->mxc_status.code != MX_STATUS_SUCCESS); - struct kmx_msg *msg = tx->mxc_msg; - struct kmx_peer *peer = tx->mxc_peer; - struct kmx_conn *conn = tx->mxc_conn; - u8 type = tx->mxc_msg_type; - int credit = mxlnd_tx_requires_credit(tx); - u64 cookie = tx->mxc_cookie; - - CDEBUG(D_NET, "entering %s (0x%llx):\n", - mxlnd_msgtype_to_str(tx->mxc_msg_type), cookie); - - if (unlikely(conn == NULL)) { - mx_get_endpoint_addr_context(tx->mxc_status.source, (void **) &peer); - conn = peer->mxp_conn; - if (conn != NULL) { - /* do not add a ref for the tx, it was set before sending */ - tx->mxc_conn = conn; - tx->mxc_peer = conn->mxk_peer; - } - } - LASSERT (peer != NULL); - LASSERT (conn != NULL); - - if (type != MXLND_MSG_PUT_DATA && type != MXLND_MSG_GET_DATA) { - LASSERT (type == msg->mxm_type); - } - - if (failed) { - tx->mxc_status.code = -EIO; - } else { - spin_lock(&conn->mxk_lock); - conn->mxk_last_tx = jiffies; - spin_unlock(&conn->mxk_lock); - } - - switch (type) { - - case MXLND_MSG_GET_DATA: - spin_lock(&conn->mxk_lock); - if (conn->mxk_incarnation == tx->mxc_incarnation) { - conn->mxk_outstanding++; - conn->mxk_data_posted--; - } - spin_unlock(&conn->mxk_lock); - break; - - case MXLND_MSG_PUT_DATA: - spin_lock(&conn->mxk_lock); - if (conn->mxk_incarnation == tx->mxc_incarnation) { - conn->mxk_data_posted--; - } - spin_unlock(&conn->mxk_lock); - break; - - case MXLND_MSG_NOOP: - case MXLND_MSG_PUT_REQ: - case MXLND_MSG_PUT_ACK: - case MXLND_MSG_GET_REQ: - case MXLND_MSG_EAGER: - //case MXLND_MSG_NAK: - break; - - case MXLND_MSG_CONN_ACK: - if (peer->mxp_incompatible) { - /* we sent our params, now close this conn */ - mxlnd_conn_disconnect(conn, 0, 1); - } - case MXLND_MSG_CONN_REQ: - if (failed) { - CDEBUG(D_NETERROR, "handle_tx_completion(): %s " - "failed with %s (%d) to %s\n", - type == MXLND_MSG_CONN_REQ ? "CONN_REQ" : "CONN_ACK", - mx_strstatus(tx->mxc_status.code), - tx->mxc_status.code, - libcfs_nid2str(tx->mxc_nid)); - if (!peer->mxp_incompatible) { - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - } - } - break; - - default: - CDEBUG(D_NETERROR, "Unknown msg type of %d\n", type); - LBUG(); - } - - if (credit) { - spin_lock(&conn->mxk_lock); - if (conn->mxk_incarnation == tx->mxc_incarnation) { - conn->mxk_ntx_posted--; - } - spin_unlock(&conn->mxk_lock); - } - - CDEBUG(D_NET, "leaving mxlnd_handle_tx_completion()\n"); - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); - - mxlnd_check_sends(peer); - - return; -} - -void -mxlnd_handle_rx_completion(struct kmx_ctx *rx) -{ - int ret = 0; - int repost = 1; - int credit = 1; - u32 nob = rx->mxc_status.xfer_length; - u64 bits = rx->mxc_status.match_info; - struct kmx_msg *msg = rx->mxc_msg; - struct kmx_peer *peer = rx->mxc_peer; - struct kmx_conn *conn = rx->mxc_conn; - u8 type = rx->mxc_msg_type; - u64 seq = 0LL; - lnet_msg_t *lntmsg[2]; - int result = 0; - u64 nic_id = 0LL; - u32 ep_id = 0; - int peer_ref = 0; - int conn_ref = 0; - int incompatible = 0; - - /* NOTE We may only know the peer's nid if it is a PUT_REQ, GET_REQ, - * failed GET reply, CONN_REQ, or a CONN_ACK */ - - /* NOTE peer may still be NULL if it is a new peer and - * conn may be NULL if this is a re-connect */ - if (likely(peer != NULL && conn != NULL)) { - /* we have a reference on the conn */ - conn_ref = 1; - } else if (peer != NULL && conn == NULL) { - /* we have a reference on the peer */ - peer_ref = 1; - } else if (peer == NULL && conn != NULL) { - /* fatal error */ - CDEBUG(D_NETERROR, "rx has conn but no peer\n"); - LBUG(); - } /* else peer and conn == NULL */ - -#if 0 - if (peer == NULL || conn == NULL) { - /* if the peer was disconnected, the peer may exist but - * not have any valid conns */ - decref = 0; /* no peer means no ref was taken for this rx */ - } -#endif - - if (conn == NULL && peer != NULL) { - spin_lock(&peer->mxp_lock); - conn = peer->mxp_conn; - if (conn) { - mxlnd_conn_addref(conn); /* conn takes ref... */ - mxlnd_peer_decref(peer); /* from peer */ - conn_ref = 1; - peer_ref = 0; - } - spin_unlock(&peer->mxp_lock); - rx->mxc_conn = conn; - } - -#if MXLND_DEBUG - CDEBUG(D_NET, "receiving msg bits=0x%llx nob=%d peer=0x%p\n", bits, nob, peer); -#endif - - lntmsg[0] = NULL; - lntmsg[1] = NULL; - - if (rx->mxc_status.code != MX_STATUS_SUCCESS) { - CDEBUG(D_NETERROR, "rx from %s failed with %s (%d)\n", - libcfs_nid2str(rx->mxc_nid), - mx_strstatus(rx->mxc_status.code), - (int) rx->mxc_status.code); - credit = 0; - goto cleanup; - } - - if (nob == 0) { - /* this may be a failed GET reply */ - if (type == MXLND_MSG_GET_DATA) { - bits = rx->mxc_status.match_info & 0x0FF0000000000000LL; - ret = (u32) (bits>>52); - lntmsg[0] = rx->mxc_lntmsg[0]; - result = -ret; - goto cleanup; - } else { - /* we had a rx complete with 0 bytes (no hdr, nothing) */ - CDEBUG(D_NETERROR, "rx from %s returned with 0 bytes\n", - libcfs_nid2str(rx->mxc_nid)); - goto cleanup; - } - } - - /* NOTE PUT_DATA and GET_DATA do not have mxc_msg, do not call unpack() */ - if (type == MXLND_MSG_PUT_DATA) { - result = rx->mxc_status.code; - lntmsg[0] = rx->mxc_lntmsg[0]; - goto cleanup; - } else if (type == MXLND_MSG_GET_DATA) { - result = rx->mxc_status.code; - lntmsg[0] = rx->mxc_lntmsg[0]; - lntmsg[1] = rx->mxc_lntmsg[1]; - goto cleanup; - } - - ret = mxlnd_unpack_msg(msg, nob); - if (ret != 0) { - CDEBUG(D_NETERROR, "Error %d unpacking rx from %s\n", - ret, libcfs_nid2str(rx->mxc_nid)); - goto cleanup; - } - rx->mxc_nob = nob; - type = msg->mxm_type; - seq = msg->mxm_seq; - - if (type != MXLND_MSG_CONN_REQ && - (!lnet_ptlcompat_matchnid(rx->mxc_nid, msg->mxm_srcnid) || - !lnet_ptlcompat_matchnid(kmxlnd_data.kmx_ni->ni_nid, msg->mxm_dstnid))) { - CDEBUG(D_NETERROR, "rx with mismatched NID (type %s) (my nid is " - "0x%llx and rx msg dst is 0x%llx)\n", - mxlnd_msgtype_to_str(type), kmxlnd_data.kmx_ni->ni_nid, - msg->mxm_dstnid); - goto cleanup; - } - - if (type != MXLND_MSG_CONN_REQ && type != MXLND_MSG_CONN_ACK) { - if ((conn != NULL && msg->mxm_srcstamp != conn->mxk_incarnation) || - msg->mxm_dststamp != kmxlnd_data.kmx_incarnation) { - if (conn != NULL) { - CDEBUG(D_NETERROR, "Stale rx from %s with type %s " - "(mxm_srcstamp (%lld) != mxk_incarnation (%lld) " - "|| mxm_dststamp (%lld) != kmx_incarnation (%lld))\n", - libcfs_nid2str(rx->mxc_nid), mxlnd_msgtype_to_str(type), - msg->mxm_srcstamp, conn->mxk_incarnation, - msg->mxm_dststamp, kmxlnd_data.kmx_incarnation); - } else { - CDEBUG(D_NETERROR, "Stale rx from %s with type %s " - "mxm_dststamp (%lld) != kmx_incarnation (%lld))\n", - libcfs_nid2str(rx->mxc_nid), mxlnd_msgtype_to_str(type), - msg->mxm_dststamp, kmxlnd_data.kmx_incarnation); - } - credit = 0; - goto cleanup; - } - } - - CDEBUG(D_NET, "Received %s with %d credits\n", - mxlnd_msgtype_to_str(type), msg->mxm_credits); - - if (msg->mxm_type != MXLND_MSG_CONN_REQ && - msg->mxm_type != MXLND_MSG_CONN_ACK) { - LASSERT(peer != NULL); - LASSERT(conn != NULL); - if (msg->mxm_credits != 0) { - spin_lock(&conn->mxk_lock); - if (msg->mxm_srcstamp == conn->mxk_incarnation) { - if ((conn->mxk_credits + msg->mxm_credits) > - *kmxlnd_tunables.kmx_credits) { - CDEBUG(D_NETERROR, "mxk_credits %d mxm_credits %d\n", - conn->mxk_credits, msg->mxm_credits); - } - conn->mxk_credits += msg->mxm_credits; - LASSERT(conn->mxk_credits >= 0); - LASSERT(conn->mxk_credits <= *kmxlnd_tunables.kmx_credits); - } - spin_unlock(&conn->mxk_lock); - } - } - - CDEBUG(D_NET, "switch %s for rx (0x%llx)\n", mxlnd_msgtype_to_str(type), seq); - switch (type) { - case MXLND_MSG_NOOP: - break; - - case MXLND_MSG_EAGER: - ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.eager.mxem_hdr, - msg->mxm_srcnid, rx, 0); - repost = ret < 0; - break; - - case MXLND_MSG_PUT_REQ: - ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.put_req.mxprm_hdr, - msg->mxm_srcnid, rx, 1); - repost = ret < 0; - break; - - case MXLND_MSG_PUT_ACK: { - u64 cookie = (u64) msg->mxm_u.put_ack.mxpam_dst_cookie; - if (cookie > MXLND_MAX_COOKIE) { - CDEBUG(D_NETERROR, "NAK for msg_type %d from %s\n", rx->mxc_msg_type, - libcfs_nid2str(rx->mxc_nid)); - result = -((cookie >> 52) & 0xff); - lntmsg[0] = rx->mxc_lntmsg[0]; - } else { - mxlnd_send_data(kmxlnd_data.kmx_ni, rx->mxc_lntmsg[0], - rx->mxc_peer, MXLND_MSG_PUT_DATA, - rx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie); - } - /* repost == 1 */ - break; - } - case MXLND_MSG_GET_REQ: - ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.get_req.mxgrm_hdr, - msg->mxm_srcnid, rx, 1); - repost = ret < 0; - break; - - case MXLND_MSG_CONN_REQ: - if (!lnet_ptlcompat_matchnid(kmxlnd_data.kmx_ni->ni_nid, msg->mxm_dstnid)) { - CDEBUG(D_NETERROR, "Can't accept %s: bad dst nid %s\n", - libcfs_nid2str(msg->mxm_srcnid), - libcfs_nid2str(msg->mxm_dstnid)); - goto cleanup; - } - if (msg->mxm_u.conn_req.mxcrm_queue_depth != *kmxlnd_tunables.kmx_credits) { - CDEBUG(D_NETERROR, "Can't accept %s: incompatible queue depth " - "%d (%d wanted)\n", - libcfs_nid2str(msg->mxm_srcnid), - msg->mxm_u.conn_req.mxcrm_queue_depth, - *kmxlnd_tunables.kmx_credits); - incompatible = 1; - } - if (msg->mxm_u.conn_req.mxcrm_eager_size != MXLND_EAGER_SIZE) { - CDEBUG(D_NETERROR, "Can't accept %s: incompatible EAGER size " - "%d (%d wanted)\n", - libcfs_nid2str(msg->mxm_srcnid), - msg->mxm_u.conn_req.mxcrm_eager_size, - (int) MXLND_EAGER_SIZE); - incompatible = 1; - } - if (peer == NULL) { - peer = mxlnd_find_peer_by_nid(msg->mxm_srcnid); /* adds peer ref */ - if (peer == NULL) { - int hash = 0; - struct kmx_peer *existing_peer = NULL; - hash = mxlnd_nid_to_hash(msg->mxm_srcnid); - - mx_decompose_endpoint_addr(rx->mxc_status.source, - &nic_id, &ep_id); - rx->mxc_nid = msg->mxm_srcnid; - - /* adds conn ref for peer and one for this function */ - ret = mxlnd_peer_alloc(&peer, msg->mxm_srcnid); - if (ret != 0) { - goto cleanup; - } - LASSERT(peer->mxp_host->mxh_ep_id == ep_id); - write_lock(&kmxlnd_data.kmx_peers_lock); - existing_peer = mxlnd_find_peer_by_nid_locked(msg->mxm_srcnid); - if (existing_peer) { - mxlnd_conn_decref(peer->mxp_conn); - mxlnd_peer_decref(peer); - peer = existing_peer; - mxlnd_conn_addref(peer->mxp_conn); - } else { - list_add_tail(&peer->mxp_peers, - &kmxlnd_data.kmx_peers[hash]); - write_unlock(&kmxlnd_data.kmx_peers_lock); - atomic_inc(&kmxlnd_data.kmx_npeers); - } - } else { - ret = mxlnd_conn_alloc(&conn, peer); /* adds 2nd ref */ - mxlnd_peer_decref(peer); /* drop ref taken above */ - if (ret != 0) { - CDEBUG(D_NETERROR, "Cannot allocate mxp_conn\n"); - goto cleanup; - } - } - conn_ref = 1; /* peer/conn_alloc() added ref for this function */ - conn = peer->mxp_conn; - } else { - struct kmx_conn *old_conn = conn; - - /* do not call mx_disconnect() */ - mxlnd_conn_disconnect(old_conn, 0, 0); - - /* the ref for this rx was taken on the old_conn */ - mxlnd_conn_decref(old_conn); - - /* This allocs a conn, points peer->mxp_conn to this one. - * The old conn is still on the peer->mxp_conns list. - * As the pending requests complete, they will call - * conn_decref() which will eventually free it. */ - ret = mxlnd_conn_alloc(&conn, peer); - if (ret != 0) { - CDEBUG(D_NETERROR, "Cannot allocate peer->mxp_conn\n"); - goto cleanup; - } - /* conn_alloc() adds one ref for the peer and one for this function */ - conn_ref = 1; - } - spin_lock(&peer->mxp_lock); - peer->mxp_incarnation = msg->mxm_srcstamp; - peer->mxp_incompatible = incompatible; - spin_unlock(&peer->mxp_lock); - spin_lock(&conn->mxk_lock); - conn->mxk_incarnation = msg->mxm_srcstamp; - conn->mxk_status = MXLND_CONN_WAIT; - spin_unlock(&conn->mxk_lock); - - /* handle_conn_ack() will create the CONN_ACK msg */ - mxlnd_iconnect(peer, MXLND_MASK_ICON_ACK); - - break; - - case MXLND_MSG_CONN_ACK: - if (!lnet_ptlcompat_matchnid(kmxlnd_data.kmx_ni->ni_nid, msg->mxm_dstnid)) { - CDEBUG(D_NETERROR, "Can't accept CONN_ACK from %s: " - "bad dst nid %s\n", libcfs_nid2str(msg->mxm_srcnid), - libcfs_nid2str(msg->mxm_dstnid)); - ret = -1; - goto failed; - } - if (msg->mxm_u.conn_req.mxcrm_queue_depth != *kmxlnd_tunables.kmx_credits) { - CDEBUG(D_NETERROR, "Can't accept CONN_ACK from %s: " - "incompatible queue depth %d (%d wanted)\n", - libcfs_nid2str(msg->mxm_srcnid), - msg->mxm_u.conn_req.mxcrm_queue_depth, - *kmxlnd_tunables.kmx_credits); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - incompatible = 1; - ret = -1; - } - if (msg->mxm_u.conn_req.mxcrm_eager_size != MXLND_EAGER_SIZE) { - CDEBUG(D_NETERROR, "Can't accept CONN_ACK from %s: " - "incompatible EAGER size %d (%d wanted)\n", - libcfs_nid2str(msg->mxm_srcnid), - msg->mxm_u.conn_req.mxcrm_eager_size, - (int) MXLND_EAGER_SIZE); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - incompatible = 1; - ret = -1; - } - spin_lock(&peer->mxp_lock); - peer->mxp_incarnation = msg->mxm_srcstamp; - peer->mxp_incompatible = incompatible; - spin_unlock(&peer->mxp_lock); - spin_lock(&conn->mxk_lock); - conn->mxk_credits = *kmxlnd_tunables.kmx_credits; - conn->mxk_outstanding = 0; - conn->mxk_incarnation = msg->mxm_srcstamp; - conn->mxk_timeout = 0; - if (!incompatible) { - conn->mxk_status = MXLND_CONN_READY; - } - spin_unlock(&conn->mxk_lock); - if (incompatible) mxlnd_conn_disconnect(conn, 0, 1); - break; - - default: - CDEBUG(D_NETERROR, "Bad MXLND message type %x from %s\n", msg->mxm_type, - libcfs_nid2str(rx->mxc_nid)); - ret = -EPROTO; - break; - } - -failed: - if (ret < 0) { - MXLND_PRINT("setting PEER_CONN_FAILED\n"); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - } - -cleanup: - if (conn != NULL) { - spin_lock(&conn->mxk_lock); - conn->mxk_last_rx = cfs_time_current(); /* jiffies */ - spin_unlock(&conn->mxk_lock); - } - - if (repost) { - /* lnet_parse() failed, etc., repost now */ - mxlnd_put_idle_rx(rx); - if (conn != NULL && credit == 1) { - if (type == MXLND_MSG_PUT_DATA) { - spin_lock(&conn->mxk_lock); - conn->mxk_outstanding++; - spin_unlock(&conn->mxk_lock); - } else if (type != MXLND_MSG_GET_DATA && - (type == MXLND_MSG_EAGER || - type == MXLND_MSG_PUT_REQ || - type == MXLND_MSG_NOOP)) { - spin_lock(&conn->mxk_lock); - conn->mxk_outstanding++; - spin_unlock(&conn->mxk_lock); - } - } - if (conn_ref) mxlnd_conn_decref(conn); - LASSERT(peer_ref == 0); - } - - if (type == MXLND_MSG_PUT_DATA || type == MXLND_MSG_GET_DATA) { - CDEBUG(D_NET, "leaving for rx (0x%llx)\n", bits); - } else { - CDEBUG(D_NET, "leaving for rx (0x%llx)\n", seq); - } - - if (lntmsg[0] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[0], result); - if (lntmsg[1] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[1], result); - - if (conn != NULL && credit == 1) mxlnd_check_sends(peer); - - return; -} - - - -void -mxlnd_handle_conn_req(struct kmx_peer *peer, mx_status_t status) -{ - struct kmx_ctx *tx = NULL; - struct kmx_msg *txmsg = NULL; - struct kmx_conn *conn = peer->mxp_conn; - - /* a conn ref was taken when calling mx_iconnect(), - * hold it until CONN_REQ or CONN_ACK completes */ - - CDEBUG(D_NET, "entering\n"); - if (status.code != MX_STATUS_SUCCESS) { - CDEBUG(D_NETERROR, "mx_iconnect() failed with %s (%d) to %s\n", - mx_strstatus(status.code), status.code, - libcfs_nid2str(peer->mxp_nid)); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - - if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) { - struct kmx_conn *new_conn = NULL; - CDEBUG(D_NETERROR, "timeout, calling conn_disconnect()\n"); - mxlnd_conn_disconnect(conn, 0, 1); - mxlnd_conn_alloc(&new_conn, peer); /* adds a ref for this function */ - mxlnd_conn_decref(new_conn); /* which we no longer need */ - spin_lock(&peer->mxp_lock); - peer->mxp_reconnect_time = 0; - spin_unlock(&peer->mxp_lock); - } - - mxlnd_conn_decref(conn); - return; - } - - spin_lock(&conn->mxk_lock); - conn->mxk_epa = status.source; - spin_unlock(&conn->mxk_lock); - /* NOTE we are holding a ref on the conn which has a ref on the peer, - * we should not need to lock the peer */ - mx_set_endpoint_addr_context(conn->mxk_epa, (void *) peer); - - /* mx_iconnect() succeeded, reset delay to 0 */ - spin_lock(&peer->mxp_lock); - peer->mxp_reconnect_time = 0; - spin_unlock(&peer->mxp_lock); - - /* marshal CONN_REQ msg */ - /* we are still using the conn ref from iconnect() - do not take another */ - tx = mxlnd_get_idle_tx(); - if (tx == NULL) { - CDEBUG(D_NETERROR, "Can't allocate CONN_REQ tx for %s\n", - libcfs_nid2str(peer->mxp_nid)); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - mxlnd_conn_decref(conn); - return; - } - - tx->mxc_peer = peer; - tx->mxc_conn = conn; - mxlnd_init_tx_msg (tx, MXLND_MSG_CONN_REQ, sizeof(kmx_connreq_msg_t), peer->mxp_nid); - txmsg = tx->mxc_msg; - txmsg->mxm_u.conn_req.mxcrm_queue_depth = *kmxlnd_tunables.kmx_credits; - txmsg->mxm_u.conn_req.mxcrm_eager_size = MXLND_EAGER_SIZE; - tx->mxc_match = mxlnd_create_match(tx, 0); - - CDEBUG(D_NET, "sending MXLND_MSG_CONN_REQ\n"); - mxlnd_queue_tx(tx); - return; -} - -void -mxlnd_handle_conn_ack(struct kmx_peer *peer, mx_status_t status) -{ - struct kmx_ctx *tx = NULL; - struct kmx_msg *txmsg = NULL; - struct kmx_conn *conn = peer->mxp_conn; - - /* a conn ref was taken when calling mx_iconnect(), - * hold it until CONN_REQ or CONN_ACK completes */ - - CDEBUG(D_NET, "entering\n"); - if (status.code != MX_STATUS_SUCCESS) { - CDEBUG(D_NETERROR, "mx_iconnect() failed for CONN_ACK with %s (%d) " - "to %s mxp_nid = 0x%llx mxp_nic_id = 0x%0llx mxh_ep_id = %d\n", - mx_strstatus(status.code), status.code, - libcfs_nid2str(peer->mxp_nid), - peer->mxp_nid, - peer->mxp_nic_id, - peer->mxp_host->mxh_ep_id); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - - if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) { - struct kmx_conn *new_conn = NULL; - CDEBUG(D_NETERROR, "timeout, calling conn_disconnect()\n"); - mxlnd_conn_disconnect(conn, 0, 1); - mxlnd_conn_alloc(&new_conn, peer); /* adds ref for - this function... */ - mxlnd_conn_decref(new_conn); /* which we no longer need */ - spin_lock(&peer->mxp_lock); - peer->mxp_reconnect_time = 0; - spin_unlock(&peer->mxp_lock); - } - - mxlnd_conn_decref(conn); - return; - } - spin_lock(&conn->mxk_lock); - conn->mxk_epa = status.source; - if (likely(!peer->mxp_incompatible)) { - conn->mxk_status = MXLND_CONN_READY; - } - spin_unlock(&conn->mxk_lock); - /* NOTE we are holding a ref on the conn which has a ref on the peer, - * we should not have to lock the peer */ - mx_set_endpoint_addr_context(conn->mxk_epa, (void *) peer); - - /* mx_iconnect() succeeded, reset delay to 0 */ - spin_lock(&peer->mxp_lock); - peer->mxp_reconnect_time = 0; - spin_unlock(&peer->mxp_lock); - - /* marshal CONN_ACK msg */ - tx = mxlnd_get_idle_tx(); - if (tx == NULL) { - CDEBUG(D_NETERROR, "Can't allocate CONN_ACK tx for %s\n", - libcfs_nid2str(peer->mxp_nid)); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - mxlnd_conn_decref(conn); - return; - } - - tx->mxc_peer = peer; - tx->mxc_conn = conn; - CDEBUG(D_NET, "sending MXLND_MSG_CONN_ACK\n"); - mxlnd_init_tx_msg (tx, MXLND_MSG_CONN_ACK, sizeof(kmx_connreq_msg_t), peer->mxp_nid); - txmsg = tx->mxc_msg; - txmsg->mxm_u.conn_req.mxcrm_queue_depth = *kmxlnd_tunables.kmx_credits; - txmsg->mxm_u.conn_req.mxcrm_eager_size = MXLND_EAGER_SIZE; - tx->mxc_match = mxlnd_create_match(tx, 0); - - mxlnd_queue_tx(tx); - return; -} - -/** - * mxlnd_request_waitd - the MX request completion thread(s) - * @arg - thread id (as a void *) - * - * This thread waits for a MX completion and then completes the request. - * We will create one thread per CPU. - */ -int -mxlnd_request_waitd(void *arg) -{ - long id = (long) arg; - char name[24]; - __u32 result = 0; - mx_return_t mxret = MX_SUCCESS; - mx_status_t status; - struct kmx_ctx *ctx = NULL; - enum kmx_req_state req_type = MXLND_REQ_TX; - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; -#if MXLND_POLLING - int count = 0; -#endif - - memset(name, 0, sizeof(name)); - snprintf(name, sizeof(name), "mxlnd_request_waitd_%02ld", id); - cfs_daemonize(name); - //cfs_block_allsigs(); - - memset(&status, 0, sizeof(status)); - - CDEBUG(D_NET, "%s starting\n", name); - - while (!kmxlnd_data.kmx_shutdown) { - mxret = MX_SUCCESS; - result = 0; -#if MXLND_POLLING - if (id == 0 && count++ < *kmxlnd_tunables.kmx_polling) { - mxret = mx_test_any(kmxlnd_data.kmx_endpt, 0LL, 0LL, - &status, &result); - } else { - count = 0; - mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT, - 0LL, 0LL, &status, &result); - } -#else - mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT, - 0LL, 0LL, &status, &result); -#endif - if (unlikely(kmxlnd_data.kmx_shutdown)) - break; - - if (result != 1) { - /* nothing completed... */ - continue; - } - - if (status.code != MX_STATUS_SUCCESS) { - CDEBUG(D_NETERROR, "wait_any() failed with %s (%d) with " - "match_info 0x%llx and length %d\n", - mx_strstatus(status.code), status.code, - (u64) status.match_info, status.msg_length); - } - - /* This may be a mx_iconnect() request completing, - * check the bit mask for CONN_REQ and CONN_ACK */ - if (status.match_info == MXLND_MASK_ICON_REQ || - status.match_info == MXLND_MASK_ICON_ACK) { - peer = (struct kmx_peer*) status.context; - if (status.match_info == MXLND_MASK_ICON_REQ) { - mxlnd_handle_conn_req(peer, status); - } else { - mxlnd_handle_conn_ack(peer, status); - } - continue; - } - - /* This must be a tx or rx */ - - /* NOTE: if this is a RX from the unexpected callback, it may - * have very little info. If we dropped it in unexpected_recv(), - * it will not have a context. If so, ignore it. */ - ctx = (struct kmx_ctx *) status.context; - if (ctx != NULL) { - - req_type = ctx->mxc_type; - conn = ctx->mxc_conn; /* this may be NULL */ - mxlnd_deq_pending_ctx(ctx); - - /* copy status to ctx->mxc_status */ - memcpy(&ctx->mxc_status, &status, sizeof(status)); - - switch (req_type) { - case MXLND_REQ_TX: - mxlnd_handle_tx_completion(ctx); - break; - case MXLND_REQ_RX: - mxlnd_handle_rx_completion(ctx); - break; - default: - CDEBUG(D_NETERROR, "Unknown ctx type %d\n", req_type); - LBUG(); - break; - } - - /* FIXME may need to reconsider this */ - /* conn is always set except for the first CONN_REQ rx - * from a new peer */ - if (!(status.code == MX_STATUS_SUCCESS || - status.code == MX_STATUS_TRUNCATED) && - conn != NULL) { - mxlnd_conn_disconnect(conn, 1, 1); - } - } - CDEBUG(D_NET, "waitd() completed task\n"); - } - CDEBUG(D_NET, "%s stopping\n", name); - mxlnd_thread_stop(id); - return 0; -} - - -unsigned long -mxlnd_check_timeouts(unsigned long now) -{ - int i = 0; - int disconnect = 0; - unsigned long next = 0; - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; - - read_lock(&kmxlnd_data.kmx_peers_lock); - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) { - - if (unlikely(kmxlnd_data.kmx_shutdown)) { - read_unlock(&kmxlnd_data.kmx_peers_lock); - return next; - } - - spin_lock(&peer->mxp_lock); - conn = peer->mxp_conn; - if (conn) { - mxlnd_conn_addref(conn); - spin_unlock(&peer->mxp_lock); - } else { - spin_unlock(&peer->mxp_lock); - continue; - } - - spin_lock(&conn->mxk_lock); - - /* if nothing pending (timeout == 0) or - * if conn is already disconnected, - * skip this conn */ - if (conn->mxk_timeout == 0 || - conn->mxk_status == MXLND_CONN_DISCONNECT) { - spin_unlock(&conn->mxk_lock); - mxlnd_conn_decref(conn); - continue; - } - - /* we want to find the timeout that will occur first. - * if it is in the future, we will sleep until then. - * if it is in the past, then we will sleep one - * second and repeat the process. */ - if ((next == 0) || (conn->mxk_timeout < next)) { - next = conn->mxk_timeout; - } - - disconnect = 0; - - if (time_after_eq(now, conn->mxk_timeout)) { - disconnect = 1; - } - spin_unlock(&conn->mxk_lock); - - if (disconnect) { - mxlnd_conn_disconnect(conn, 1, 1); - } - mxlnd_conn_decref(conn); - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - if (next == 0) next = now + MXLND_COMM_TIMEOUT; - - return next; -} - -/** - * mxlnd_timeoutd - enforces timeouts on messages - * @arg - thread id (as a void *) - * - * This thread queries each peer for its earliest timeout. If a peer has timed out, - * it calls mxlnd_conn_disconnect(). - * - * After checking for timeouts, try progressing sends (call check_sends()). - */ -int -mxlnd_timeoutd(void *arg) -{ - int i = 0; - long id = (long) arg; - unsigned long now = 0; - unsigned long next = 0; - unsigned long delay = HZ; - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; - - cfs_daemonize("mxlnd_timeoutd"); - //cfs_block_allsigs(); - - CDEBUG(D_NET, "timeoutd starting\n"); - - while (!kmxlnd_data.kmx_shutdown) { - - now = jiffies; - /* if the next timeout has not arrived, go back to sleep */ - if (time_after(now, next)) { - next = mxlnd_check_timeouts(now); - } - - read_lock(&kmxlnd_data.kmx_peers_lock); - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) { - spin_lock(&peer->mxp_lock); - conn = peer->mxp_conn; - if (conn) mxlnd_conn_addref(conn); /* take ref... */ - spin_unlock(&peer->mxp_lock); - - if (conn == NULL) - continue; - - if (conn->mxk_status != MXLND_CONN_DISCONNECT && - time_after(now, conn->mxk_last_tx + HZ)) { - mxlnd_check_sends(peer); - } - mxlnd_conn_decref(conn); /* until here */ - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - - mxlnd_sleep(delay); - } - CDEBUG(D_NET, "timeoutd stopping\n"); - mxlnd_thread_stop(id); - return 0; -} diff --git a/lnet/klnds/mxlnd/mxlnd_modparams.c b/lnet/klnds/mxlnd/mxlnd_modparams.c deleted file mode 100644 index d36a1885fcecd51bbbb97e793a2b3a9949ebd1ad..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd_modparams.c +++ /dev/null @@ -1,72 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Scott Atchley <atchley at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "mxlnd.h" - -static int n_waitd = MXLND_N_SCHED; -CFS_MODULE_PARM(n_waitd, "i", int, 0444, - "# of completion daemons"); - -static int max_peers = MXLND_MAX_PEERS; -CFS_MODULE_PARM(max_peers, "i", int, 0444, - "maximum number of peers that may connect"); - -static int cksum = MXLND_CKSUM; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not data payload) checksums"); - -static int ntx = MXLND_NTX; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of total tx message descriptors"); - -static int credits = MXLND_MSG_QUEUE_DEPTH; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int board = MXLND_MX_BOARD; -CFS_MODULE_PARM(board, "i", int, 0444, - "index value of the Myrinet board (NIC)"); - -static int ep_id = MXLND_MX_EP_ID; -CFS_MODULE_PARM(ep_id, "i", int, 0444, "MX endpoint ID"); - -static int polling = MXLND_POLLING; -CFS_MODULE_PARM(polling, "i", int, 0444, - "Use 0 to block (wait). A value > 0 will poll that many times before blocking"); - -static char *hosts = NULL; -CFS_MODULE_PARM(hosts, "s", charp, 0444, - "IP-to-hostname resolution file"); - -kmx_tunables_t kmxlnd_tunables = { - .kmx_n_waitd = &n_waitd, - .kmx_max_peers = &max_peers, - .kmx_cksum = &cksum, - .kmx_ntx = &ntx, - .kmx_credits = &credits, - .kmx_board = &board, - .kmx_ep_id = &ep_id, - .kmx_polling = &polling, - .kmx_hosts = &hosts -}; diff --git a/lnet/klnds/mxlnd/mxlnd_wire.h b/lnet/klnds/mxlnd/mxlnd_wire.h deleted file mode 100644 index a929608409e3fc57b18caca380fdb19fb0af5f21..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd_wire.h +++ /dev/null @@ -1,95 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Scott Atchley <atchley at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * MXLND wire format - sent in sender's byte order - */ - -typedef struct kmx_connreq_msg -{ - u32 mxcrm_queue_depth; /* per peer max messages in flight */ - u32 mxcrm_eager_size; /* size of preposted eager messages */ -} WIRE_ATTR kmx_connreq_msg_t; - -typedef struct kmx_eager_msg -{ - lnet_hdr_t mxem_hdr; /* lnet header */ - char mxem_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kmx_eager_msg_t; - -typedef struct kmx_putreq_msg -{ - lnet_hdr_t mxprm_hdr; /* lnet header */ - u64 mxprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kmx_putreq_msg_t; - -typedef struct kmx_putack_msg -{ - u64 mxpam_src_cookie; /* reflected completion cookie */ - u64 mxpam_dst_cookie; /* opaque completion cookie */ -} WIRE_ATTR kmx_putack_msg_t; - -typedef struct kmx_getreq_msg -{ - lnet_hdr_t mxgrm_hdr; /* lnet header */ - u64 mxgrm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kmx_getreq_msg_t; - -typedef struct kmx_msg -{ - /* First two fields fixed for all time */ - u32 mxm_magic; /* MXLND message */ - u16 mxm_version; /* version number */ - - u8 mxm_type; /* message type */ - u8 mxm_credits; /* returned credits */ - u32 mxm_nob; /* # of bytes in whole message */ - u32 mxm_cksum; /* checksum (0 == no checksum) */ - u64 mxm_srcnid; /* sender's NID */ - u64 mxm_srcstamp; /* sender's incarnation */ - u64 mxm_dstnid; /* destination's NID */ - u64 mxm_dststamp; /* destination's incarnation */ - u64 mxm_seq; /* sequence number */ - - union { - kmx_connreq_msg_t conn_req; - kmx_eager_msg_t eager; - kmx_putreq_msg_t put_req; - kmx_putack_msg_t put_ack; - kmx_getreq_msg_t get_req; - } WIRE_ATTR mxm_u; -} WIRE_ATTR kmx_msg_t; - -#define MXLND_MSG_MAGIC 0x4d583130 /* unique magic 'MX10' */ -#define MXLND_MSG_VERSION 0x01 - -#define MXLND_MSG_CONN_REQ 0xc /* connection request */ -#define MXLND_MSG_CONN_ACK 0xa /* connection request response */ -#define MXLND_MSG_EAGER 0xe /* eager message */ -#define MXLND_MSG_NOOP 0x1 /* no msg, return credits */ -#define MXLND_MSG_PUT_REQ 0x2 /* put request src->sink */ -#define MXLND_MSG_PUT_ACK 0x3 /* put ack src<-sink */ -#define MXLND_MSG_PUT_DATA 0x4 /* put payload src->sink */ -#define MXLND_MSG_GET_REQ 0x5 /* get request sink->src */ -#define MXLND_MSG_GET_DATA 0x6 /* get payload sink<-src */ diff --git a/lnet/klnds/o2iblnd/.cvsignore b/lnet/klnds/o2iblnd/.cvsignore deleted file mode 100644 index 2e9b6f47052e4a9724b08b6336229b01d72676a4..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend -wirecheck diff --git a/lnet/klnds/o2iblnd/Makefile.in b/lnet/klnds/o2iblnd/Makefile.in deleted file mode 100644 index 569c266a2d68c1e9591839548c35407b69adac03..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/Makefile.in +++ /dev/null @@ -1,8 +0,0 @@ -MODULES := ko2iblnd -ko2iblnd-objs := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o - -# Need to make sure we use PRE, not POST here so that an external OFED -# source pool overrides any in-kernel OFED sources -EXTRA_PRE_CFLAGS := @O2IBCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/o2iblnd/autoMakefile.am b/lnet/klnds/o2iblnd/autoMakefile.am deleted file mode 100644 index 83788fd2ee37a895a37f4d26fc9d988b5bfe8b02..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_O2IBLND -modulenet_DATA = ko2iblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(ko2iblnd-objs:%.o=%.c) o2iblnd.h diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c deleted file mode 100644 index 5d4001022d05e3d8df38debe0e18568e73c483f5..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ /dev/null @@ -1,1722 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "o2iblnd.h" - -lnd_t the_kiblnd = { - .lnd_type = O2IBLND, - .lnd_startup = kiblnd_startup, - .lnd_shutdown = kiblnd_shutdown, - .lnd_ctl = kiblnd_ctl, - .lnd_send = kiblnd_send, - .lnd_recv = kiblnd_recv, -}; - -kib_data_t kiblnd_data; - -__u32 -kiblnd_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kiblnd_init_msg (kib_msg_t *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob; -} - -void -kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg, - int credits, lnet_nid_t dstnid, __u64 dststamp) -{ - kib_net_t *net = ni->ni_data; - - /* CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. */ - msg->ibm_magic = IBLND_MSG_MAGIC; - msg->ibm_version = IBLND_MSG_VERSION; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, dstnid); - msg->ibm_srcstamp = net->ibn_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - - if (*kiblnd_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob); - } -} - -int -kiblnd_unpack_msg(kib_msg_t *msg, int nob) -{ - const int hdr_size = offsetof(kib_msg_t, ibm_u); - __u32 msg_cksum; - int flip; - int msg_nob; -#if !IBLND_MAP_ON_DEMAND - int i; - int n; -#endif - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - if (msg->ibm_magic == IBLND_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) { - flip = 1; - } else { - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - if (msg->ibm_version != - (flip ? __swab16(IBLND_MSG_VERSION) : IBLND_MSG_VERSION)) { - CERROR("Bad version: %d\n", msg->ibm_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kiblnd_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - __swab16s(&msg->ibm_version); - CLASSERT (sizeof(msg->ibm_type) == 1); - CLASSERT (sizeof(msg->ibm_credits) == 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBLND_MSG_NOOP: - break; - - case IBLND_MSG_IMMEDIATE: - if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { - CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); - return -EPROTO; - } - break; - - case IBLND_MSG_PUT_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) { - CERROR("Short PUT_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putreq))); - return -EPROTO; - } - break; - - case IBLND_MSG_PUT_ACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putack))); - return -EPROTO; - } -#if IBLND_MAP_ON_DEMAND - if (flip) { - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrags); - } - - n = msg->ibm_u.putack.ibpam_rd.rd_nfrags; - if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) { - CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", - n, IBLND_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) { - for (i = 0; i < n; i++) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob); - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr); - } - } -#endif - break; - - case IBLND_MSG_GET_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.get))); - return -EPROTO; - } -#if IBLND_MAP_ON_DEMAND - if (flip) { - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrags); - } - - n = msg->ibm_u.get.ibgm_rd.rd_nfrags; - if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) { - CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", - n, IBLND_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) - for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrags; i++) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob); - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr); - } -#endif - break; - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { - CERROR("Short RDMA completion: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.completion))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - - case IBLND_MSG_CONNREQ: - case IBLND_MSG_CONNACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { - CERROR("Short connreq/ack: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.connparams))); - return -EPROTO; - } - if (flip) { - __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth); - __swab16s(&msg->ibm_u.connparams.ibcp_max_frags); - __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); - } - break; - } - return 0; -} - -int -kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) -{ - kib_peer_t *peer; - kib_net_t *net = ni->ni_data; - unsigned long flags; - - LASSERT (net != NULL); - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC(peer, sizeof(*peer)); - if (peer == NULL) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->ibp_ni = ni; - peer->ibp_nid = nid; - peer->ibp_error = 0; - peer->ibp_last_alive = cfs_time_current(); - atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD(&peer->ibp_conns); - INIT_LIST_HEAD(&peer->ibp_tx_queue); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - /* always called with a ref on ni, which prevents ni being shutdown */ - LASSERT (net->ibn_shutdown == 0); - - /* npeers only grows with the global lock held */ - atomic_inc(&net->ibn_npeers); - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - *peerp = peer; - return 0; -} - -void -kiblnd_destroy_peer (kib_peer_t *peer) -{ - kib_net_t *net = peer->ibp_ni->ni_data; - - LASSERT (net != NULL); - LASSERT (atomic_read(&peer->ibp_refcount) == 0); - LASSERT (!kiblnd_peer_active(peer)); - LASSERT (peer->ibp_connecting == 0); - LASSERT (peer->ibp_accepting == 0); - LASSERT (list_empty(&peer->ibp_conns)); - LASSERT (list_empty(&peer->ibp_tx_queue)); - - LIBCFS_FREE(peer, sizeof(*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec(&net->ibn_npeers); -} - -void -kiblnd_destroy_dev (kib_dev_t *dev) -{ - LASSERT (dev->ibd_nnets == 0); - - if (!list_empty(&dev->ibd_list)) /* on kib_devs? */ - list_del_init(&dev->ibd_list); - - if (dev->ibd_mr != NULL) - ib_dereg_mr(dev->ibd_mr); - - if (dev->ibd_pd != NULL) - ib_dealloc_pd(dev->ibd_pd); - - if (dev->ibd_cmid != NULL) - rdma_destroy_id(dev->ibd_cmid); - - LIBCFS_FREE(dev, sizeof(*dev)); -} - -kib_peer_t * -kiblnd_find_peer_locked (lnet_nid_t nid) -{ - /* the caller is responsible for accounting the additional reference - * that this creates */ - struct list_head *peer_list = kiblnd_nid2peerlist(nid); - struct list_head *tmp; - kib_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry(tmp, kib_peer_t, ibp_list); - - LASSERT (peer->ibp_connecting > 0 || /* creating conns */ - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); /* active conn */ - - if (peer->ibp_nid != nid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_nid2str(nid), - atomic_read(&peer->ibp_refcount)); - return peer; - } - return NULL; -} - -void -kiblnd_unlink_peer_locked (kib_peer_t *peer) -{ - LASSERT (list_empty(&peer->ibp_conns)); - - LASSERT (kiblnd_peer_active(peer)); - list_del_init(&peer->ibp_list); - /* lose peerlist's ref */ - kiblnd_peer_decref(peer); -} - -int -kiblnd_get_peer_info (lnet_ni_t *ni, int index, - lnet_nid_t *nidp, int *count) -{ - kib_peer_t *peer; - struct list_head *ptmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - - list_for_each (ptmp, &kiblnd_data.kib_peers[i]) { - - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); - - if (peer->ibp_ni != ni) - continue; - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *count = atomic_read(&peer->ibp_refcount); - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return 0; - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return -ENOENT; -} - -void -kiblnd_del_peer_locked (kib_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kib_conn_t *conn; - - if (list_empty(&peer->ibp_conns)) { - kiblnd_unlink_peer_locked(peer); - } else { - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - kiblnd_close_conn_locked(conn, 0); - } - /* NB closing peer's last conn unlinked it. */ - } - /* NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. */ -} - -int -kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid) -{ - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - kib_peer_t *peer; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) { - lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - } else { - lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); - - if (peer->ibp_ni != ni) - continue; - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT (list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, &zombies); - } - - kiblnd_del_peer_locked(peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_txlist_done(ni, &zombies, -EIO); - - return rc; -} - -kib_conn_t * -kiblnd_get_conn_by_idx (lnet_ni_t *ni, int index) -{ - kib_peer_t *peer; - struct list_head *ptmp; - kib_conn_t *conn; - struct list_head *ctmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - list_for_each (ptmp, &kiblnd_data.kib_peers[i]) { - - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); - - if (peer->ibp_ni != ni) - continue; - - list_for_each (ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, kib_conn_t, ibc_list); - kiblnd_conn_addref(conn); - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return conn; - } - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return NULL; -} - -void -kiblnd_debug_rx (kib_rx_t *rx) -{ - CDEBUG(D_CONSOLE, " %p status %d msg_type %x cred %d\n", - rx, rx->rx_status, rx->rx_msg->ibm_type, - rx->rx_msg->ibm_credits); -} - -void -kiblnd_debug_tx (kib_tx_t *tx) -{ - CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lx " - "cookie "LPX64" msg %s%s type %x cred %d\n", - tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting, - tx->tx_status, tx->tx_deadline, tx->tx_cookie, - tx->tx_lntmsg[0] == NULL ? "-" : "!", - tx->tx_lntmsg[1] == NULL ? "-" : "!", - tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits); -} - -void -kiblnd_debug_conn (kib_conn_t *conn) -{ - struct list_head *tmp; - int i; - - spin_lock(&conn->ibc_lock); - - CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n", - atomic_read(&conn->ibc_refcount), conn, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - CDEBUG(D_CONSOLE, " state %d nposted %d cred %d o_cred %d r_cred %d\n", - conn->ibc_state, conn->ibc_nsends_posted, conn->ibc_credits, - conn->ibc_outstanding_credits, conn->ibc_reserved_credits); - CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error); - - CDEBUG(D_CONSOLE, " early_rxs:\n"); - list_for_each(tmp, &conn->ibc_early_rxs) - kiblnd_debug_rx(list_entry(tmp, kib_rx_t, rx_list)); - - CDEBUG(D_CONSOLE, " tx_noops:\n"); - list_for_each(tmp, &conn->ibc_tx_noops) - kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_nocred:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_nocred) - kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_rsrvd) - kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue:\n"); - list_for_each(tmp, &conn->ibc_tx_queue) - kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " active_txs:\n"); - list_for_each(tmp, &conn->ibc_active_txs) - kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " rxs:\n"); - for (i = 0; i < IBLND_RX_MSGS; i++) - kiblnd_debug_rx(&conn->ibc_rxs[i]); - - spin_unlock(&conn->ibc_lock); -} - -kib_conn_t * -kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid, int state) -{ - /* CAVEAT EMPTOR: - * If the new conn is created successfully it takes over the caller's - * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself - * is destroyed. On failure, the caller's ref on 'peer' remains and - * she must dispose of 'cmid'. (Actually I'd block forever if I tried - * to destroy 'cmid' here since I'm called from the CM which still has - * its ref on 'cmid'). */ - kib_conn_t *conn; - kib_net_t *net = peer->ibp_ni->ni_data; - int i; - int page_offset; - int ipage; - int rc; - struct ib_cq *cq; - struct ib_qp_init_attr *init_qp_attr; - unsigned long flags; - - LASSERT (net != NULL); - LASSERT (!in_interrupt()); - - LIBCFS_ALLOC(init_qp_attr, sizeof(*init_qp_attr)); - if (init_qp_attr == NULL) { - CERROR("Can't allocate qp_attr for %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed_0; - } - - LIBCFS_ALLOC(conn, sizeof(*conn)); - if (conn == NULL) { - CERROR("Can't allocate connection for %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed_1; - } - - memset(conn, 0, sizeof(*conn)); /* zero flags, NULL pointers etc... */ - - conn->ibc_state = IBLND_CONN_INIT; - conn->ibc_peer = peer; /* I take the caller's ref */ - cmid->context = conn; /* for future CM callbacks */ - conn->ibc_cmid = cmid; - - INIT_LIST_HEAD(&conn->ibc_early_rxs); - INIT_LIST_HEAD(&conn->ibc_tx_noops); - INIT_LIST_HEAD(&conn->ibc_tx_queue); - INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD(&conn->ibc_active_txs); - spin_lock_init(&conn->ibc_lock); - - LIBCFS_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - if (conn->ibc_connvars == NULL) { - CERROR("Can't allocate in-progress connection state\n"); - goto failed_2; - } - memset(conn->ibc_connvars, 0, sizeof(*conn->ibc_connvars)); - - LIBCFS_ALLOC(conn->ibc_rxs, IBLND_RX_MSGS * sizeof(kib_rx_t)); - if (conn->ibc_rxs == NULL) { - CERROR("Cannot allocate RX buffers\n"); - goto failed_2; - } - memset(conn->ibc_rxs, 0, IBLND_RX_MSGS * sizeof(kib_rx_t)); - - rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, IBLND_RX_MSG_PAGES); - if (rc != 0) - goto failed_2; - - for (i = ipage = page_offset = 0; i < IBLND_RX_MSGS; i++) { - struct page *page = conn->ibc_rx_pages->ibp_pages[ipage]; - kib_rx_t *rx = &conn->ibc_rxs[i]; - - rx->rx_conn = conn; - rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - rx->rx_msgaddr = kiblnd_dma_map_single(cmid->device, - rx->rx_msg, IBLND_MSG_SIZE, - DMA_FROM_DEVICE); - KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr); - - CDEBUG(D_NET,"rx %d: %p "LPX64"("LPX64")\n", - i, rx->rx_msg, rx->rx_msgaddr, - lnet_page2phys(page) + page_offset); - - page_offset += IBLND_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBLND_RX_MSG_PAGES); - } - } - -#if (IBLND_OFED_VERSION == 1025) - cq = ib_create_cq(cmid->device, - kiblnd_cq_completion, kiblnd_cq_event, conn, - IBLND_CQ_ENTRIES(), 0); -#else - cq = ib_create_cq(cmid->device, - kiblnd_cq_completion, kiblnd_cq_event, conn, - IBLND_CQ_ENTRIES()); -#endif - if (!IS_ERR(cq)) { - conn->ibc_cq = cq; - } else { - CERROR("Can't create CQ: %ld\n", PTR_ERR(cq)); - goto failed_2; - } - - rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - if (rc != 0) { - CERROR("Can't request completion notificiation: %d\n", rc); - goto failed_2; - } - - memset(init_qp_attr, 0, sizeof(*init_qp_attr)); - init_qp_attr->event_handler = kiblnd_qp_event; - init_qp_attr->qp_context = conn; - init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS; - init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS; - init_qp_attr->cap.max_send_sge = 1; - init_qp_attr->cap.max_recv_sge = 1; - init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR; - init_qp_attr->qp_type = IB_QPT_RC; - init_qp_attr->send_cq = cq; - init_qp_attr->recv_cq = cq; - - rc = 0; - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - switch (*kiblnd_tunables.kib_ib_mtu) { - default: - rc = *kiblnd_tunables.kib_ib_mtu; - /* fall through to... */ - case 0: /* set tunable to the default - * CAVEAT EMPTOR! this assumes the default is one of the MTUs - * below, otherwise we'll WARN on the next QP create */ - *kiblnd_tunables.kib_ib_mtu = - ib_mtu_enum_to_int(cmid->route.path_rec->mtu); - break; - case 256: - cmid->route.path_rec->mtu = IB_MTU_256; - break; - case 512: - cmid->route.path_rec->mtu = IB_MTU_512; - break; - case 1024: - cmid->route.path_rec->mtu = IB_MTU_1024; - break; - case 2048: - cmid->route.path_rec->mtu = IB_MTU_2048; - break; - case 4096: - cmid->route.path_rec->mtu = IB_MTU_4096; - break; - } - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (rc != 0) - CWARN("Invalid IB MTU value %d, using default value %d\n", - rc, *kiblnd_tunables.kib_ib_mtu); - - rc = rdma_create_qp(cmid, net->ibn_dev->ibd_pd, init_qp_attr); - if (rc != 0) { - CERROR("Can't create QP: %d\n", rc); - goto failed_2; - } - - LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); - - /* 1 ref for caller and each rxmsg */ - atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS); - conn->ibc_nrx = IBLND_RX_MSGS; - - /* post receives */ - for (i = 0; i < IBLND_RX_MSGS; i++) { - rc = kiblnd_post_rx(&conn->ibc_rxs[i], - IBLND_POSTRX_NO_CREDIT); - if (rc != 0) { - CERROR("Can't post rxmsg: %d\n", rc); - - /* Make posted receives complete */ - kiblnd_abort_receives(conn); - - /* correct # of posted buffers - * NB locking needed now I'm racing with completion */ - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - conn->ibc_nrx -= IBLND_RX_MSGS - i; - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, - flags); - - /* Drop my own and unused rxbuffer refcounts */ - while (i++ <= IBLND_RX_MSGS) - kiblnd_conn_decref(conn); - - return NULL; - } - } - - /* Init successful! */ - LASSERT (state == IBLND_CONN_ACTIVE_CONNECT || - state == IBLND_CONN_PASSIVE_WAIT); - conn->ibc_state = state; - - /* 1 more conn */ - atomic_inc(&net->ibn_nconns); - return conn; - - failed_2: - kiblnd_destroy_conn(conn); - failed_1: - LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); - failed_0: - return NULL; -} - -void -kiblnd_destroy_conn (kib_conn_t *conn) -{ - struct rdma_cm_id *cmid = conn->ibc_cmid; - kib_peer_t *peer = conn->ibc_peer; - int rc; - int i; - - LASSERT (!in_interrupt()); - LASSERT (atomic_read(&conn->ibc_refcount) == 0); - LASSERT (list_empty(&conn->ibc_early_rxs)); - LASSERT (list_empty(&conn->ibc_tx_noops)); - LASSERT (list_empty(&conn->ibc_tx_queue)); - LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT (list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT (list_empty(&conn->ibc_active_txs)); - LASSERT (conn->ibc_nsends_posted == 0); - - switch (conn->ibc_state) { - default: - /* conn must be completely disengaged from the network */ - LBUG(); - - case IBLND_CONN_DISCONNECTED: - /* connvars should have been freed already */ - LASSERT (conn->ibc_connvars == NULL); - break; - - case IBLND_CONN_INIT: - break; - } - - if (conn->ibc_cmid->qp != NULL) - rdma_destroy_qp(conn->ibc_cmid); - - if (conn->ibc_cq != NULL) { - rc = ib_destroy_cq(conn->ibc_cq); - if (rc != 0) - CWARN("Error destroying CQ: %d\n", rc); - } - - if (conn->ibc_rx_pages != NULL) { - LASSERT (conn->ibc_rxs != NULL); - - for (i = 0; i < IBLND_RX_MSGS; i++) { - kib_rx_t *rx = &conn->ibc_rxs[i]; - - LASSERT (rx->rx_nob >= 0); /* not posted */ - - kiblnd_dma_unmap_single(conn->ibc_cmid->device, - KIBLND_UNMAP_ADDR(rx, rx_msgunmap, - rx->rx_msgaddr), - IBLND_MSG_SIZE, DMA_FROM_DEVICE); - } - - kiblnd_free_pages(conn->ibc_rx_pages); - } - - if (conn->ibc_rxs != NULL) { - LIBCFS_FREE(conn->ibc_rxs, - IBLND_RX_MSGS * sizeof(kib_rx_t)); - } - - if (conn->ibc_connvars != NULL) - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - - /* See CAVEAT EMPTOR above in kiblnd_create_conn */ - if (conn->ibc_state != IBLND_CONN_INIT) { - kib_net_t *net = peer->ibp_ni->ni_data; - - kiblnd_peer_decref(peer); - rdma_destroy_id(cmid); - atomic_dec(&net->ibn_nconns); - } - - LIBCFS_FREE(conn, sizeof(*conn)); -} - -int -kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - count++; - kiblnd_close_conn_locked(conn, why); - } - - return count; -} - -int -kiblnd_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - if (conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_incarnation, incarnation); - - count++; - kiblnd_close_conn_locked(conn, -ESTALE); - } - - return count; -} - -int -kiblnd_close_matching_conns (lnet_ni_t *ni, lnet_nid_t nid) -{ - kib_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - unsigned long flags; - int count = 0; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - else { - lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); - - if (peer->ibp_ni != ni) - continue; - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kiblnd_close_peer_conns_locked(peer, 0); - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return 0; - - return (count == 0) ? -ENOENT : 0; -} - -int -kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - int count = 0; - - rc = kiblnd_get_peer_info(ni, data->ioc_count, - &nid, &count); - data->ioc_nid = nid; - data->ioc_count = count; - break; - } - - case IOC_LIBCFS_DEL_PEER: { - rc = kiblnd_del_peer(ni, data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kib_conn_t *conn = kiblnd_get_conn_by_idx(ni, data->ioc_count); - - if (conn == NULL) { - rc = -ENOENT; - } else { - // kiblnd_debug_conn(conn); - rc = 0; - data->ioc_nid = conn->ibc_peer->ibp_nid; - kiblnd_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kiblnd_close_matching_conns(ni, data->ioc_nid); - break; - } - - default: - break; - } - - return rc; -} - -void -kiblnd_free_pages (kib_pages_t *p) -{ - int npages = p->ibp_npages; - int i; - - for (i = 0; i < npages; i++) - if (p->ibp_pages[i] != NULL) - __free_page(p->ibp_pages[i]); - - LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); -} - -int -kiblnd_alloc_pages (kib_pages_t **pp, int npages) -{ - kib_pages_t *p; - int i; - - LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages])); - if (p == NULL) { - CERROR("Can't allocate descriptor for %d pages\n", npages); - return -ENOMEM; - } - - memset(p, 0, offsetof(kib_pages_t, ibp_pages[npages])); - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_page(GFP_KERNEL); - if (p->ibp_pages[i] == NULL) { - CERROR("Can't allocate page %d of %d\n", i, npages); - kiblnd_free_pages(p); - return -ENOMEM; - } - } - - *pp = p; - return 0; -} - -void -kiblnd_free_tx_descs (lnet_ni_t *ni) -{ - int i; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - - if (net->ibn_tx_descs != NULL) { - for (i = 0; i < IBLND_TX_MSGS(); i++) { - kib_tx_t *tx = &net->ibn_tx_descs[i]; - -#if IBLND_MAP_ON_DEMAND - if (tx->tx_pages != NULL) - LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); -#else - if (tx->tx_wrq != NULL) - LIBCFS_FREE(tx->tx_wrq, - (1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - - if (tx->tx_sge != NULL) - LIBCFS_FREE(tx->tx_sge, - (1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_sge)); - - if (tx->tx_rd != NULL) - LIBCFS_FREE(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBLND_MAX_RDMA_FRAGS])); - - if (tx->tx_frags != NULL) - LIBCFS_FREE(tx->tx_frags, - IBLND_MAX_RDMA_FRAGS * - sizeof(*tx->tx_frags)); -#endif - } - - LIBCFS_FREE(net->ibn_tx_descs, - IBLND_TX_MSGS() * sizeof(kib_tx_t)); - } - - if (net->ibn_tx_pages != NULL) - kiblnd_free_pages(net->ibn_tx_pages); -} - -int -kiblnd_alloc_tx_descs (lnet_ni_t *ni) -{ - int i; - int rc; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - - rc = kiblnd_alloc_pages(&net->ibn_tx_pages, IBLND_TX_MSG_PAGES()); - - if (rc != 0) { - CERROR("Can't allocate tx pages\n"); - return rc; - } - - LIBCFS_ALLOC (net->ibn_tx_descs, - IBLND_TX_MSGS() * sizeof(kib_tx_t)); - if (net->ibn_tx_descs == NULL) { - CERROR("Can't allocate %d tx descriptors\n", IBLND_TX_MSGS()); - return -ENOMEM; - } - - memset(net->ibn_tx_descs, 0, - IBLND_TX_MSGS() * sizeof(kib_tx_t)); - - for (i = 0; i < IBLND_TX_MSGS(); i++) { - kib_tx_t *tx = &net->ibn_tx_descs[i]; - -#if IBLND_MAP_ON_DEMAND - LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); - if (tx->tx_pages == NULL) { - CERROR("Can't allocate phys page vector[%d]\n", - LNET_MAX_IOV); - return -ENOMEM; - } -#else - LIBCFS_ALLOC(tx->tx_wrq, - (1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - if (tx->tx_wrq == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_sge, - (1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_sge)); - if (tx->tx_sge == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBLND_MAX_RDMA_FRAGS])); - if (tx->tx_rd == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_frags, - IBLND_MAX_RDMA_FRAGS * - sizeof(*tx->tx_frags)); - if (tx->tx_frags == NULL) - return -ENOMEM; -#endif - } - - return 0; -} - -void -kiblnd_unmap_tx_descs (lnet_ni_t *ni) -{ - int i; - kib_tx_t *tx; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - - for (i = 0; i < IBLND_TX_MSGS(); i++) { - tx = &net->ibn_tx_descs[i]; - - kiblnd_dma_unmap_single(net->ibn_dev->ibd_cmid->device, - KIBLND_UNMAP_ADDR(tx, tx_msgunmap, - tx->tx_msgaddr), - IBLND_MSG_SIZE, DMA_TO_DEVICE); - } -} - -void -kiblnd_map_tx_descs (lnet_ni_t *ni) -{ - int ipage = 0; - int page_offset = 0; - int i; - struct page *page; - kib_tx_t *tx; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - - /* pre-mapped messages are not bigger than 1 page */ - CLASSERT (IBLND_MSG_SIZE <= PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - CLASSERT (PAGE_SIZE % IBLND_MSG_SIZE == 0); - - for (i = 0; i < IBLND_TX_MSGS(); i++) { - page = net->ibn_tx_pages->ibp_pages[ipage]; - tx = &net->ibn_tx_descs[i]; - - tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - tx->tx_msgaddr = kiblnd_dma_map_single( - net->ibn_dev->ibd_cmid->device, - tx->tx_msg, IBLND_MSG_SIZE, DMA_TO_DEVICE); - KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr); - - list_add(&tx->tx_list, &net->ibn_idle_txs); - - page_offset += IBLND_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBLND_TX_MSG_PAGES()); - } - } -} - -void -kiblnd_base_shutdown (void) -{ - int i; - - LASSERT (list_empty(&kiblnd_data.kib_devs)); - - CDEBUG(D_MALLOC, "before LND base cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - switch (kiblnd_data.kib_init) { - default: - LBUG(); - - case IBLND_INIT_ALL: - case IBLND_INIT_DATA: - LASSERT (kiblnd_data.kib_peers != NULL); - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - LASSERT (list_empty(&kiblnd_data.kib_peers[i])); - } - LASSERT (list_empty(&kiblnd_data.kib_connd_zombies)); - LASSERT (list_empty(&kiblnd_data.kib_connd_conns)); - - /* flag threads to terminate; wake and wait for them to die */ - kiblnd_data.kib_shutdown = 1; - wake_up_all(&kiblnd_data.kib_sched_waitq); - wake_up_all(&kiblnd_data.kib_connd_waitq); - - i = 2; - while (atomic_read(&kiblnd_data.kib_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read(&kiblnd_data.kib_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - - /* fall through */ - - case IBLND_INIT_NOTHING: - break; - } - - if (kiblnd_data.kib_peers != NULL) - LIBCFS_FREE(kiblnd_data.kib_peers, - sizeof(struct list_head) * - kiblnd_data.kib_peer_hash_size); - - CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - kiblnd_data.kib_init = IBLND_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -void -kiblnd_shutdown (lnet_ni_t *ni) -{ - kib_net_t *net = ni->ni_data; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - int i; - unsigned long flags; - - LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL); - - if (net == NULL) - goto out; - - CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - write_lock_irqsave(g_lock, flags); - net->ibn_shutdown = 1; - write_unlock_irqrestore(g_lock, flags); - - switch (net->ibn_init) { - default: - LBUG(); - - case IBLND_INIT_ALL: - /* nuke all existing peers within this net */ - kiblnd_del_peer(ni, LNET_NID_ANY); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read(&net->ibn_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */ - "%s: waiting for %d peers to disconnect\n", - libcfs_nid2str(ni->ni_nid), - atomic_read(&net->ibn_npeers)); - cfs_pause(cfs_time_seconds(1)); - } - - kiblnd_unmap_tx_descs(ni); - - LASSERT (net->ibn_dev->ibd_nnets > 0); - net->ibn_dev->ibd_nnets--; - - /* fall through */ - - case IBLND_INIT_NOTHING: - LASSERT (atomic_read(&net->ibn_nconns) == 0); - -#if IBLND_MAP_ON_DEMAND - if (net->ibn_fmrpool != NULL) - ib_destroy_fmr_pool(net->ibn_fmrpool); -#endif - if (net->ibn_dev != NULL && - net->ibn_dev->ibd_nnets == 0) - kiblnd_destroy_dev(net->ibn_dev); - - break; - } - - kiblnd_free_tx_descs(ni); - - CDEBUG(D_MALLOC, "after LND net cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - net->ibn_init = IBLND_INIT_NOTHING; - ni->ni_data = NULL; - - LIBCFS_FREE(net, sizeof(*net)); - -out: - if (list_empty(&kiblnd_data.kib_devs)) - kiblnd_base_shutdown(); - return; -} - -int -kiblnd_base_startup (void) -{ - int rc; - int i; - - LASSERT (kiblnd_data.kib_init == IBLND_INIT_NOTHING); - - if (*kiblnd_tunables.kib_credits > *kiblnd_tunables.kib_ntx) { - CERROR("Can't set credits(%d) > ntx(%d)\n", - *kiblnd_tunables.kib_credits, - *kiblnd_tunables.kib_ntx); - return -EINVAL; - } - - PORTAL_MODULE_USE; - memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */ - - rwlock_init(&kiblnd_data.kib_global_lock); - - INIT_LIST_HEAD(&kiblnd_data.kib_devs); - - kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE; - LIBCFS_ALLOC(kiblnd_data.kib_peers, - sizeof(struct list_head) * kiblnd_data.kib_peer_hash_size); - if (kiblnd_data.kib_peers == NULL) { - goto failed; - } - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]); - - spin_lock_init(&kiblnd_data.kib_connd_lock); - INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); - INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); - init_waitqueue_head(&kiblnd_data.kib_connd_waitq); - - spin_lock_init(&kiblnd_data.kib_sched_lock); - INIT_LIST_HEAD(&kiblnd_data.kib_sched_conns); - init_waitqueue_head(&kiblnd_data.kib_sched_waitq); - - kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR; - - /* lists/ptrs/locks initialised */ - kiblnd_data.kib_init = IBLND_INIT_DATA; - /*****************************************************/ - - for (i = 0; i < IBLND_N_SCHED; i++) { - rc = kiblnd_thread_start(kiblnd_scheduler, (void *)((long)i)); - if (rc != 0) { - CERROR("Can't spawn o2iblnd scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - rc = kiblnd_thread_start(kiblnd_connd, NULL); - if (rc != 0) { - CERROR("Can't spawn o2iblnd connd: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kiblnd_data.kib_init = IBLND_INIT_ALL; - /*****************************************************/ - - return 0; - - failed: - kiblnd_base_shutdown(); - return -ENETDOWN; -} - -int -kiblnd_startup (lnet_ni_t *ni) -{ - char *ifname; - kib_net_t *net; - kib_dev_t *ibdev; - struct list_head *tmp; - struct timeval tv; - int rc; - - LASSERT (ni->ni_lnd == &the_kiblnd); - - if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) { - rc = kiblnd_base_startup(); - if (rc != 0) - return rc; - } - - LIBCFS_ALLOC(net, sizeof(*net)); - ni->ni_data = net; - if (net == NULL) - goto failed; - - memset(net, 0, sizeof(*net)); - - do_gettimeofday(&tv); - net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits; - ni->ni_peertxcredits = *kiblnd_tunables.kib_peercredits; - - spin_lock_init(&net->ibn_tx_lock); - INIT_LIST_HEAD(&net->ibn_idle_txs); - - rc = kiblnd_alloc_tx_descs(ni); - if (rc != 0) { - CERROR("Can't allocate tx descs\n"); - goto failed; - } - - if (ni->ni_interfaces[0] != NULL) { - /* Use the IPoIB interface specified in 'networks=' */ - - CLASSERT (LNET_MAX_INTERFACES > 1); - if (ni->ni_interfaces[1] != NULL) { - CERROR("Multiple interfaces not supported\n"); - goto failed; - } - - ifname = ni->ni_interfaces[0]; - } else { - ifname = *kiblnd_tunables.kib_default_ipif; - } - - if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) { - CERROR("IPoIB interface name too long: %s\n", ifname); - goto failed; - } - - ibdev = NULL; - list_for_each (tmp, &kiblnd_data.kib_devs) { - ibdev = list_entry(tmp, kib_dev_t, ibd_list); - - if (!strcmp(&ibdev->ibd_ifname[0], ifname)) - break; - - ibdev = NULL; - } - - if (ibdev == NULL) { - __u32 ip; - __u32 netmask; - int up; - struct rdma_cm_id *id; - struct ib_pd *pd; - struct ib_mr *mr; - struct sockaddr_in addr; - - rc = libcfs_ipif_query(ifname, &up, &ip, &netmask); - if (rc != 0) { - CERROR("Can't query IPoIB interface %s: %d\n", - ifname, rc); - goto failed; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", - ifname); - goto failed; - } - - LIBCFS_ALLOC(ibdev, sizeof(*ibdev)); - if (ibdev == NULL) - goto failed; - - memset(ibdev, 0, sizeof(*ibdev)); - - INIT_LIST_HEAD(&ibdev->ibd_list); /* not yet in kib_devs */ - ibdev->ibd_ifip = ip; - strcpy(&ibdev->ibd_ifname[0], ifname); - - id = rdma_create_id(kiblnd_cm_callback, ibdev, RDMA_PS_TCP); - if (!IS_ERR(id)) { - ibdev->ibd_cmid = id; - } else { - CERROR("Can't create listen ID: %ld\n", PTR_ERR(id)); - goto failed; - } - - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_port = htons(*kiblnd_tunables.kib_service); - addr.sin_addr.s_addr = htonl(ip); - - rc = rdma_bind_addr(id, (struct sockaddr *)&addr); - if (rc != 0) { - CERROR("Can't bind to %s: %d\n", ifname, rc); - goto failed; - } - - /* Binding should have assigned me an IB device */ - LASSERT (id->device != NULL); - - pd = ib_alloc_pd(id->device); - if (!IS_ERR(pd)) { - ibdev->ibd_pd = pd; - } else { - CERROR("Can't allocate PD: %ld\n", PTR_ERR(pd)); - goto failed; - } - -#if IBLND_MAP_ON_DEMAND - /* MR for sends and receives */ - mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); -#else - /* MR for sends, recieves _and_ RDMA...........v */ - mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE); -#endif - if (!IS_ERR(mr)) { - ibdev->ibd_mr = mr; - } else { - CERROR("Can't get MR: %ld\n", PTR_ERR(mr)); - goto failed; - } - - rc = rdma_listen(id, 0); - if (rc != 0) { - CERROR("Can't start listener: %d\n", rc); - goto failed; - } - - list_add_tail(&ibdev->ibd_list, - &kiblnd_data.kib_devs); - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip); - net->ibn_dev = ibdev; - -#if IBLND_MAP_ON_DEMAND - /* FMR pool for RDMA */ - { - struct ib_fmr_pool *fmrpool; - struct ib_fmr_pool_param param = { - .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE, - .page_shift = PAGE_SHIFT, - .access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE), - .pool_size = *kiblnd_tunables.kib_fmr_pool_size, - .dirty_watermark = *kiblnd_tunables.kib_fmr_flush_trigger, - .flush_function = NULL, - .flush_arg = NULL, - .cache = *kiblnd_tunables.kib_fmr_cache}; - - if (*kiblnd_tunables.kib_fmr_pool_size < - *kiblnd_tunables.kib_ntx) { - CERROR("Can't set fmr pool size (%d) < ntx(%d)\n", - *kiblnd_tunables.kib_fmr_pool_size, - *kiblnd_tunables.kib_ntx); - goto failed; - } - - fmrpool = ib_create_fmr_pool(ibdev->ibd_pd, ¶m); - if (!IS_ERR(fmrpool)) { - net->ibn_fmrpool = fmrpool; - } else { - CERROR("Can't create FMR pool: %ld\n", - PTR_ERR(fmrpool)); - goto failed; - } - } -#endif - - kiblnd_map_tx_descs(ni); - - ibdev->ibd_nnets++; - net->ibn_init = IBLND_INIT_ALL; - - return 0; - -failed: - kiblnd_shutdown(ni); - - CDEBUG(D_NET, "kiblnd_startup failed\n"); - return -ENETDOWN; -} - -void __exit -kiblnd_module_fini (void) -{ - lnet_unregister_lnd(&the_kiblnd); - kiblnd_tunables_fini(); -} - -int __init -kiblnd_module_init (void) -{ - int rc; - - CLASSERT (sizeof(kib_msg_t) <= IBLND_MSG_SIZE); -#if !IBLND_MAP_ON_DEMAND - CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - <= IBLND_MSG_SIZE); - CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - <= IBLND_MSG_SIZE); -#endif - rc = kiblnd_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kiblnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel OpenIB gen2 LND v1.00"); -MODULE_LICENSE("GPL"); - -module_init(kiblnd_module_init); -module_exit(kiblnd_module_fini); diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c deleted file mode 100644 index 7881b499e059a0ec7585d419e7a5626141874ac8..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ /dev/null @@ -1,3196 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "o2iblnd.h" - -char * -kiblnd_msgtype2str(int type) -{ - switch (type) { - case IBLND_MSG_CONNREQ: - return "CONNREQ"; - - case IBLND_MSG_CONNACK: - return "CONNACK"; - - case IBLND_MSG_NOOP: - return "NOOP"; - - case IBLND_MSG_IMMEDIATE: - return "IMMEDIATE"; - - case IBLND_MSG_PUT_REQ: - return "PUT_REQ"; - - case IBLND_MSG_PUT_NAK: - return "PUT_NAK"; - - case IBLND_MSG_PUT_ACK: - return "PUT_ACK"; - - case IBLND_MSG_PUT_DONE: - return "PUT_DONE"; - - case IBLND_MSG_GET_REQ: - return "GET_REQ"; - - case IBLND_MSG_GET_DONE: - return "GET_DONE"; - - default: - return "???"; - } -} - -void -kiblnd_tx_done (lnet_ni_t *ni, kib_tx_t *tx) -{ - lnet_msg_t *lntmsg[2]; - kib_net_t *net = ni->ni_data; - int rc; - int i; - - LASSERT (net != NULL); - LASSERT (!in_interrupt()); - LASSERT (!tx->tx_queued); /* mustn't be queued for sending */ - LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */ - LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */ - -#if IBLND_MAP_ON_DEMAND - if (tx->tx_fmr != NULL) { - rc = ib_fmr_pool_unmap(tx->tx_fmr); - LASSERT (rc == 0); - - if (tx->tx_status != 0) { - rc = ib_flush_fmr_pool(net->ibn_fmrpool); - LASSERT (rc == 0); - } - - tx->tx_fmr = NULL; - } -#else - if (tx->tx_nfrags != 0) { - kiblnd_dma_unmap_sg(net->ibn_dev->ibd_cmid->device, - tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir); - tx->tx_nfrags = 0; - } -#endif - /* tx may have up to 2 lnet msgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - rc = tx->tx_status; - - if (tx->tx_conn != NULL) { - LASSERT (ni == tx->tx_conn->ibc_peer->ibp_ni); - - kiblnd_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nwrq = 0; - tx->tx_status = 0; - - spin_lock(&net->ibn_tx_lock); - - list_add(&tx->tx_list, &net->ibn_idle_txs); - - spin_unlock(&net->ibn_tx_lock); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (lntmsg[i] == NULL) - continue; - - lnet_finalize(ni, lntmsg[i], rc); - } -} - -void -kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int status) -{ - kib_tx_t *tx; - - while (!list_empty (txlist)) { - tx = list_entry (txlist->next, kib_tx_t, tx_list); - - list_del (&tx->tx_list); - /* complete now */ - tx->tx_waiting = 0; - tx->tx_status = status; - kiblnd_tx_done(ni, tx); - } -} - -kib_tx_t * -kiblnd_get_idle_tx (lnet_ni_t *ni) -{ - kib_net_t *net = ni->ni_data; - kib_tx_t *tx; - - LASSERT (net != NULL); - - spin_lock(&net->ibn_tx_lock); - - if (list_empty(&net->ibn_idle_txs)) { - spin_unlock(&net->ibn_tx_lock); - return NULL; - } - - tx = list_entry(net->ibn_idle_txs.next, kib_tx_t, tx_list); - list_del(&tx->tx_list); - - /* Allocate a new completion cookie. It might not be needed, - * but we've got a lock right now and we're unlikely to - * wrap... */ - tx->tx_cookie = kiblnd_data.kib_next_tx_cookie++; - - spin_unlock(&net->ibn_tx_lock); - - LASSERT (tx->tx_nwrq == 0); - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending == 0); - LASSERT (!tx->tx_waiting); - LASSERT (tx->tx_status == 0); - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); -#if IBLND_MAP_ON_DEMAND - LASSERT (tx->tx_fmr == NULL); -#else - LASSERT (tx->tx_nfrags == 0); -#endif - - return tx; -} - -void -kiblnd_drop_rx (kib_rx_t *rx) -{ - kib_conn_t *conn = rx->rx_conn; - unsigned long flags; - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - LASSERT (conn->ibc_nrx > 0); - conn->ibc_nrx--; - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags); - - kiblnd_conn_decref(conn); -} - -int -kiblnd_post_rx (kib_rx_t *rx, int credit) -{ - kib_conn_t *conn = rx->rx_conn; - kib_net_t *net = conn->ibc_peer->ibp_ni->ni_data; - struct ib_recv_wr *bad_wrq; - int rc; - - LASSERT (net != NULL); - LASSERT (!in_interrupt()); - LASSERT (credit == IBLND_POSTRX_NO_CREDIT || - credit == IBLND_POSTRX_PEER_CREDIT || - credit == IBLND_POSTRX_RSRVD_CREDIT); - - rx->rx_sge.length = IBLND_MSG_SIZE; - rx->rx_sge.lkey = net->ibn_dev->ibd_mr->lkey; - rx->rx_sge.addr = rx->rx_msgaddr; - - rx->rx_wrq.next = NULL; - rx->rx_wrq.sg_list = &rx->rx_sge; - rx->rx_wrq.num_sge = 1; - rx->rx_wrq.wr_id = kiblnd_ptr2wreqid(rx, IBLND_WID_RX); - - LASSERT (conn->ibc_state >= IBLND_CONN_INIT); - LASSERT (rx->rx_nob >= 0); /* not posted */ - - if (conn->ibc_state > IBLND_CONN_ESTABLISHED) { - kiblnd_drop_rx(rx); /* No more posts for this rx */ - return 0; - } - - rx->rx_nob = -1; /* flag posted */ - - rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq); - - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */ - return rc; - - if (rc != 0) { - CERROR("Can't post rx for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kiblnd_close_conn(conn, rc); - kiblnd_drop_rx(rx); /* No more posts for this rx */ - return rc; - } - - if (credit == IBLND_POSTRX_NO_CREDIT) - return 0; - - spin_lock(&conn->ibc_lock); - if (credit == IBLND_POSTRX_PEER_CREDIT) - conn->ibc_outstanding_credits++; - else - conn->ibc_reserved_credits++; - spin_unlock(&conn->ibc_lock); - - kiblnd_check_sends(conn); - return 0; -} - -kib_tx_t * -kiblnd_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie) -{ - struct list_head *tmp; - - list_for_each(tmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending != 0 || tx->tx_waiting); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_waiting && - tx->tx_msg->ibm_type == txtype) - return tx; - - CWARN("Bad completion: %swaiting, type %x (wanted %x)\n", - tx->tx_waiting ? "" : "NOT ", - tx->tx_msg->ibm_type, txtype); - } - return NULL; -} - -void -kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) -{ - kib_tx_t *tx; - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - int idle; - - spin_lock(&conn->ibc_lock); - - tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie); - if (tx == NULL) { - spin_unlock(&conn->ibc_lock); - - CWARN("Unmatched completion type %x cookie "LPX64" from %s\n", - txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_close_conn(conn, -EPROTO); - return; - } - - if (tx->tx_status == 0) { /* success so far */ - if (status < 0) { /* failed? */ - tx->tx_status = status; - } else if (txtype == IBLND_MSG_GET_REQ) { - lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status); - } - } - - tx->tx_waiting = 0; - - idle = !tx->tx_queued && (tx->tx_sending == 0); - if (idle) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (idle) - kiblnd_tx_done(ni, tx); -} - -void -kiblnd_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie) -{ - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - kib_tx_t *tx = kiblnd_get_idle_tx(ni); - - if (tx == NULL) { - CERROR("Can't get tx for completion %x for %s\n", - type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - tx->tx_msg->ibm_u.completion.ibcm_status = status; - tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie; - kiblnd_init_tx_msg(ni, tx, type, sizeof(kib_completion_msg_t)); - - kiblnd_queue_tx(tx, conn); -} - -void -kiblnd_handle_rx (kib_rx_t *rx) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - int credits = msg->ibm_credits; - kib_tx_t *tx; - int rc = 0; - int rc2; - int post_credit; - - LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - CDEBUG (D_NET, "Received %x[%d] from %s\n", - msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - if (credits != 0) { - /* Have I received credits that will let me send? */ - spin_lock(&conn->ibc_lock); - - if (conn->ibc_credits + credits > IBLND_MSG_QUEUE_SIZE) { - rc2 = conn->ibc_credits; - spin_unlock(&conn->ibc_lock); - - CERROR("Bad credits from %s: %d + %d > %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - rc2, credits, IBLND_MSG_QUEUE_SIZE); - - kiblnd_close_conn(conn, -EPROTO); - kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT); - return; - } - - conn->ibc_credits += credits; - - /* This ensures the credit taken by NOOP can be returned */ - if (msg->ibm_type == IBLND_MSG_NOOP) - conn->ibc_outstanding_credits++; - - spin_unlock(&conn->ibc_lock); - kiblnd_check_sends(conn); - } - - switch (msg->ibm_type) { - default: - CERROR("Bad IBLND message type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - post_credit = IBLND_POSTRX_NO_CREDIT; - rc = -EPROTO; - break; - - case IBLND_MSG_NOOP: - if (credits != 0) /* credit already posted */ - post_credit = IBLND_POSTRX_NO_CREDIT; - else /* a keepalive NOOP */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_IMMEDIATE: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_PUT_REQ: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr, - msg->ibm_srcnid, rx, 1); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_PUT_NAK: - CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBLND_MSG_PUT_ACK: - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - - spin_lock(&conn->ibc_lock); - tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ, - msg->ibm_u.putack.ibpam_src_cookie); - if (tx != NULL) - list_del(&tx->tx_list); - spin_unlock(&conn->ibc_lock); - - if (tx == NULL) { - CERROR("Unmatched PUT_ACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - } - - LASSERT (tx->tx_waiting); - /* CAVEAT EMPTOR: I could be racing with tx_complete, but... - * (a) I can overwrite tx_msg since my peer has received it! - * (b) tx_waiting set tells tx_complete() it's not done. */ - - tx->tx_nwrq = 0; /* overwrite PUT_REQ */ - - rc2 = kiblnd_init_rdma(ni, tx, IBLND_MSG_PUT_DONE, - kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd), - &msg->ibm_u.putack.ibpam_rd, - msg->ibm_u.putack.ibpam_dst_cookie); - if (rc2 < 0) - CERROR("Can't setup rdma for PUT to %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2); - - spin_lock(&conn->ibc_lock); - tx->tx_waiting = 0; /* clear waiting and queue atomically */ - kiblnd_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - break; - - case IBLND_MSG_PUT_DONE: - post_credit = IBLND_POSTRX_PEER_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBLND_MSG_GET_REQ: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr, - msg->ibm_srcnid, rx, 1); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_GET_DONE: - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - } - - if (rc < 0) /* protocol error */ - kiblnd_close_conn(conn, rc); - - if (post_credit != IBLND_POSTRX_DONT_POST) - kiblnd_post_rx(rx, post_credit); -} - -void -kiblnd_rx_complete (kib_rx_t *rx, int status, int nob) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - kib_net_t *net = ni->ni_data; - unsigned long flags; - int rc; - int err = -EIO; - - LASSERT (net != NULL); - LASSERT (rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - - if (conn->ibc_state > IBLND_CONN_ESTABLISHED) - goto ignore; - - if (status != IB_WC_SUCCESS) { - CDEBUG(D_NETERROR, "Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), status); - goto failed; - } - - LASSERT (nob >= 0); - rx->rx_nob = nob; - - rc = kiblnd_unpack_msg(msg, rx->rx_nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || - msg->ibm_dstnid != ni->ni_nid || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != net->ibn_incarnation) { - CERROR ("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - err = -ESTALE; - goto failed; - } - - /* set time last known alive */ - kiblnd_peer_alive(conn->ibc_peer); - - /* racing with connection establishment/teardown! */ - - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - /* must check holding global lock to eliminate race */ - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - list_add_tail(&rx->rx_list, &conn->ibc_early_rxs); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return; - } - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - } - kiblnd_handle_rx(rx); - return; - - failed: - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - kiblnd_close_conn(conn, err); - ignore: - kiblnd_drop_rx(rx); /* Don't re-post rx. */ -} - -struct page * -kiblnd_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) { - page = vmalloc_to_page ((void *)vaddr); - LASSERT (page != NULL); - return page; - } -#ifdef CONFIG_HIGHMEM - if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) { - /* No highmem pages only used for bulk (kiov) I/O */ - CERROR("find page for address in highmem\n"); - LBUG(); - } -#endif - page = virt_to_page (vaddr); - LASSERT (page != NULL); - return page; -} - -#if !IBLND_MAP_ON_DEMAND -int -kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - struct scatterlist *sg; - int i; - int fragnob; - unsigned long vaddr; - struct page *page; - int page_offset; - kib_net_t *net = ni->ni_data; - - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT (net != NULL); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - sg = tx->tx_frags; - do { - LASSERT (niov > 0); - - vaddr = ((unsigned long)iov->iov_base) + offset; - page_offset = vaddr & (PAGE_SIZE - 1); - page = kiblnd_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR ("Can't find page\n"); - return -EFAULT; - } - - fragnob = min((int)(iov->iov_len - offset), nob); - fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); - - sg->page = page; - sg->offset = page_offset; - sg->length = fragnob; - sg++; - - if (offset + fragnob < iov->iov_len) { - offset += fragnob; - } else { - offset = 0; - iov++; - niov--; - } - nob -= fragnob; - } while (nob > 0); - - /* If rd is not tx_rd, it's going to get sent to a peer and I'm the - * RDMA sink */ - tx->tx_nfrags = sg - tx->tx_frags; - tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - - rd->rd_nfrags = kiblnd_dma_map_sg(net->ibn_dev->ibd_cmid->device, - tx->tx_frags, tx->tx_nfrags, - tx->tx_dmadir); - rd->rd_key = (rd != tx->tx_rd) ? - net->ibn_dev->ibd_mr->rkey : net->ibn_dev->ibd_mr->lkey; - - for (i = 0; i < rd->rd_nfrags; i++) { - rd->rd_frags[i].rf_nob = kiblnd_sg_dma_len( - net->ibn_dev->ibd_cmid->device, &tx->tx_frags[i]); - rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address( - net->ibn_dev->ibd_cmid->device, &tx->tx_frags[i]); - } - - return 0; -} - -int -kiblnd_setup_rd_kiov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - struct scatterlist *sg; - int i; - int fragnob; - kib_net_t *net = ni->ni_data; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (net != NULL); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - sg = tx->tx_frags; - do { - LASSERT (nkiov > 0); - - fragnob = min((int)(kiov->kiov_len - offset), nob); - - memset(sg, 0, sizeof(*sg)); - sg->page = kiov->kiov_page; - sg->offset = kiov->kiov_offset + offset; - sg->length = fragnob; - sg++; - - offset = 0; - kiov++; - nkiov--; - nob -= fragnob; - } while (nob > 0); - - /* If rd is not tx_rd, it's going to get sent to a peer and I'm the - * RDMA sink */ - tx->tx_nfrags = sg - tx->tx_frags; - tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - - rd->rd_nfrags = kiblnd_dma_map_sg(net->ibn_dev->ibd_cmid->device, - tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir); - rd->rd_key = (rd != tx->tx_rd) ? - net->ibn_dev->ibd_mr->rkey : net->ibn_dev->ibd_mr->lkey; - - for (i = 0; i < tx->tx_nfrags; i++) { - rd->rd_frags[i].rf_nob = kiblnd_sg_dma_len( - net->ibn_dev->ibd_cmid->device, &tx->tx_frags[i]); - rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address( - net->ibn_dev->ibd_cmid->device, &tx->tx_frags[i]); -#if 0 - CDEBUG(D_WARNING,"frag[%d]: "LPX64" for %d\n", - i, rd->rd_frags[i].rf_addr, rd->rd_frags[i].rf_nob); -#endif - } - - return 0; -} -#else -int -kiblnd_map_tx (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - int npages, unsigned long page_offset, int nob) -{ - struct ib_pool_fmr *fmr; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - LASSERT (tx->tx_fmr == NULL); - LASSERT (page_offset < PAGE_SIZE); - LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT))); - LASSERT (npages <= LNET_MAX_IOV); - - rd->rd_addr = 0; - - fmr = ib_fmr_pool_map_phys(net->ibn_fmrpool, tx->tx_pages, - npages, rd->rd_addr); - if (IS_ERR(fmr)) { - CERROR ("Can't map %d pages: %ld\n", npages, PTR_ERR(fmr)); - return PTR_ERR(fmr); - } - - /* If rd is not tx_rd, it's going to get sent to a peer, who will need - * the rkey */ - - rd->rd_key = (rd != tx->tx_rd) ? fmr->fmr->rkey : fmr->fmr->lkey; - rd->rd_nob = nob; - - tx->tx_fmr = fmr; - return 0; -} - -int -kiblnd_setup_rd_iov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - int resid; - int fragnob; - struct page *page; - int npages; - unsigned long page_offset; - unsigned long vaddr; - - LASSERT (nob > 0); - LASSERT (niov > 0); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR ("Can't map multiple vaddr fragments\n"); - return (-EMSGSIZE); - } - - vaddr = ((unsigned long)iov->iov_base) + offset; - - page_offset = vaddr & (PAGE_SIZE - 1); - resid = nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - - page = kiblnd_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR("Can't find page for %lu\n", vaddr); - return -EFAULT; - } - - tx->tx_pages[npages++] = lnet_page2phys(page); - - fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1)); - vaddr += fragnob; - resid -= fragnob; - - } while (resid > 0); - - return kiblnd_map_tx(ni, tx, rd, npages, page_offset, nob); -} - -int -kiblnd_setup_rd_kiov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - int resid; - int npages; - unsigned long page_offset; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (nkiov <= LNET_MAX_IOV); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - page_offset = kiov->kiov_offset + offset; - - resid = offset + nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - LASSERT (nkiov > 0); - - if ((npages > 0 && kiov->kiov_offset != 0) || - (resid > kiov->kiov_len && - (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) { - /* Can't have gaps */ - CERROR ("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", - npages, kiov->kiov_offset, kiov->kiov_len); - - return -EINVAL; - } - - tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page); - resid -= kiov->kiov_len; - kiov++; - nkiov--; - } while (resid > 0); - - return kiblnd_map_tx(ni, tx, rd, npages, page_offset, nob); -} -#endif - -void -kiblnd_check_sends (kib_conn_t *conn) -{ - kib_tx_t *tx; - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - int rc; - int consume_cred = 0; - struct ib_send_wr *bad_wrq; - int done; - - /* Don't send anything until after the connection is established */ - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - CDEBUG(D_NET, "%s too soon\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - spin_lock(&conn->ibc_lock); - - LASSERT (conn->ibc_nsends_posted <= - *kiblnd_tunables.kib_concurrent_sends); - LASSERT (conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (kiblnd_send_noop(conn)) { - spin_unlock(&conn->ibc_lock); - - tx = kiblnd_get_idle_tx(ni); - if (tx != NULL) - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0); - - spin_lock(&conn->ibc_lock); - - if (tx != NULL) - kiblnd_queue_tx_locked(tx, conn); - } - - for (;;) { - if (!list_empty(&conn->ibc_tx_queue_nocred)) { - tx = list_entry(conn->ibc_tx_queue_nocred.next, - kib_tx_t, tx_list); - consume_cred = 0; - } else if (!list_empty(&conn->ibc_tx_noops)) { - tx = list_entry(conn->ibc_tx_noops.next, - kib_tx_t, tx_list); - consume_cred = 1; - } else if (!list_empty(&conn->ibc_tx_queue)) { - tx = list_entry(conn->ibc_tx_queue.next, - kib_tx_t, tx_list); - consume_cred = 1; - } else { - /* nothing to send right now */ - break; - } - - LASSERT (tx->tx_queued); - /* We rely on this for QP sizing */ - LASSERT (tx->tx_nwrq > 0 && - tx->tx_nwrq <= 1 + IBLND_MAX_RDMA_FRAGS); - - LASSERT (conn->ibc_outstanding_credits >= 0); - LASSERT (conn->ibc_outstanding_credits <= IBLND_MSG_QUEUE_SIZE); - LASSERT (conn->ibc_credits >= 0); - LASSERT (conn->ibc_credits <= IBLND_MSG_QUEUE_SIZE); - - if (conn->ibc_nsends_posted == - *kiblnd_tunables.kib_concurrent_sends) { - /* tx completions outstanding... */ - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (consume_cred) { - if (conn->ibc_credits == 0) { /* no credits */ - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; /* NB ibc_tx_queue_nocred checked */ - } - - /* Last credit reserved for NOOP */ - if (conn->ibc_credits == 1 && - tx->tx_msg->ibm_type != IBLND_MSG_NOOP) { - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; /* NB ibc_tx_noops checked */ - } - } - - list_del(&tx->tx_list); - tx->tx_queued = 0; - - /* NB don't drop ibc_lock before bumping tx_sending */ - - if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP && - !kiblnd_send_noop(conn)) { - /* redundant NOOP */ - spin_unlock(&conn->ibc_lock); - kiblnd_tx_done(ni, tx); - spin_lock(&conn->ibc_lock); - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - continue; - } - - kiblnd_pack_msg(ni, tx->tx_msg, conn->ibc_outstanding_credits, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation); - - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (consume_cred) - conn->ibc_credits--; - - /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA - * PUT. If so, it was first queued here as a PUT_REQ, sent and - * stashed on ibc_active_txs, matched by an incoming PUT_ACK, - * and then re-queued here. It's (just) possible that - * tx_sending is non-zero if we've not done the tx_complete() from - * the first send; hence the ++ rather than = below. */ - tx->tx_sending++; - - list_add (&tx->tx_list, &conn->ibc_active_txs); -#if 0 - { - int i; - - for (i = 0; i < tx->tx_nwrq - 1; i++) { - LASSERT (tx->tx_wrq[i].opcode == IB_WR_RDMA_WRITE); - LASSERT (tx->tx_wrq[i].next == &tx->tx_wrq[i+1]); - LASSERT (tx->tx_wrq[i].sg_list == &tx->tx_sge[i]); - - CDEBUG(D_WARNING, "WORK[%d]: RDMA "LPX64 - " for %d k %x -> "LPX64" k %x\n", i, - tx->tx_wrq[i].sg_list->addr, - tx->tx_wrq[i].sg_list->length, - tx->tx_wrq[i].sg_list->lkey, - tx->tx_wrq[i].wr.rdma.remote_addr, - tx->tx_wrq[i].wr.rdma.rkey); - } - - LASSERT (tx->tx_wrq[i].opcode == IB_WR_SEND); - LASSERT (tx->tx_wrq[i].next == NULL); - LASSERT (tx->tx_wrq[i].sg_list == &tx->tx_sge[i]); - - CDEBUG(D_WARNING, "WORK[%d]: SEND "LPX64" for %d k %x\n", i, - tx->tx_wrq[i].sg_list->addr, - tx->tx_wrq[i].sg_list->length, - tx->tx_wrq[i].sg_list->lkey); - } -#endif - /* I'm still holding ibc_lock! */ - if (conn->ibc_state != IBLND_CONN_ESTABLISHED) - rc = -ECONNABORTED; - else - rc = ib_post_send(conn->ibc_cmid->qp, tx->tx_wrq, &bad_wrq); - - conn->ibc_last_send = jiffies; - - if (rc != 0) { - /* NB credits are transferred in the actual - * message, which can only be the last work item */ - conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; - if (consume_cred) - conn->ibc_credits++; - conn->ibc_nsends_posted--; - - tx->tx_status = rc; - tx->tx_waiting = 0; - tx->tx_sending--; - - done = (tx->tx_sending == 0); - if (done) - list_del (&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (conn->ibc_state == IBLND_CONN_ESTABLISHED) - CERROR("Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - else - CDEBUG(D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kiblnd_close_conn(conn, rc); - - if (done) - kiblnd_tx_done(ni, tx); - return; - } - } - - spin_unlock(&conn->ibc_lock); -} - -void -kiblnd_tx_complete (kib_tx_t *tx, int status) -{ - int failed = (status != IB_WC_SUCCESS); - kib_conn_t *conn = tx->tx_conn; - int idle; - - LASSERT (tx->tx_sending > 0); - - if (failed) { - if (conn->ibc_state == IBLND_CONN_ESTABLISHED) - CDEBUG(D_NETERROR, "Tx -> %s cookie "LPX64 - "sending %d waiting %d: failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_cookie, tx->tx_sending, tx->tx_waiting, - status); - - kiblnd_close_conn(conn, -EIO); - } else { - kiblnd_peer_alive(conn->ibc_peer); - } - - spin_lock(&conn->ibc_lock); - - /* I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. */ - - tx->tx_sending--; - conn->ibc_nsends_posted--; - - if (failed) { - tx->tx_waiting = 0; /* don't wait for peer */ - tx->tx_status = -EIO; - } - - idle = (tx->tx_sending == 0) && /* This is the final callback */ - !tx->tx_waiting && /* Not waiting for peer */ - !tx->tx_queued; /* Not re-queued (PUT_DONE) */ - if (idle) - list_del(&tx->tx_list); - - kiblnd_conn_addref(conn); /* 1 ref for me.... */ - - spin_unlock(&conn->ibc_lock); - - if (idle) - kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx); - - kiblnd_check_sends(conn); - - kiblnd_conn_decref(conn); /* ...until here */ -} - -void -kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob) -{ - kib_net_t *net = ni->ni_data; - struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; - struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq]; - int nob = offsetof (kib_msg_t, ibm_u) + body_nob; - - LASSERT (net != NULL); - LASSERT (tx->tx_nwrq >= 0); - LASSERT (tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1); - LASSERT (nob <= IBLND_MSG_SIZE); - - kiblnd_init_msg(tx->tx_msg, type, body_nob); - - sge->addr = tx->tx_msgaddr; - sge->lkey = net->ibn_dev->ibd_mr->lkey; - sge->length = nob; - - memset(wrq, 0, sizeof(*wrq)); - - wrq->next = NULL; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_SEND; - wrq->send_flags = IB_SEND_SIGNALED; - - tx->tx_nwrq++; -} - -int -kiblnd_init_rdma (lnet_ni_t *ni, kib_tx_t *tx, int type, - int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie) -{ - kib_msg_t *ibmsg = tx->tx_msg; - kib_rdma_desc_t *srcrd = tx->tx_rd; - struct ib_sge *sge = &tx->tx_sge[0]; - struct ib_send_wr *wrq = &tx->tx_wrq[0]; - int rc = nob; - -#if IBLND_MAP_ON_DEMAND - LASSERT (!in_interrupt()); - LASSERT (tx->tx_nwrq == 0); - LASSERT (type == IBLND_MSG_GET_DONE || - type == IBLND_MSG_PUT_DONE); - - sge->addr = srcrd->rd_addr; - sge->lkey = srcrd->rd_key; - sge->length = nob; - - wrq = &tx->tx_wrq[0]; - - wrq->next = &tx->tx_wrq[1]; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_RDMA_WRITE; - wrq->send_flags = 0; - - wrq->wr.rdma.remote_addr = dstrd->rd_addr; - wrq->wr.rdma.rkey = dstrd->rd_key; - - tx->tx_nwrq = 1; -#else - /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */ - int resid = nob; - kib_rdma_frag_t *srcfrag; - int srcidx; - kib_rdma_frag_t *dstfrag; - int dstidx; - int wrknob; - - LASSERT (!in_interrupt()); - LASSERT (tx->tx_nwrq == 0); - LASSERT (type == IBLND_MSG_GET_DONE || - type == IBLND_MSG_PUT_DONE); - - srcidx = dstidx = 0; - srcfrag = &srcrd->rd_frags[0]; - dstfrag = &dstrd->rd_frags[0]; - - while (resid > 0) { - if (srcidx >= srcrd->rd_nfrags) { - CERROR("Src buffer exhausted: %d frags\n", srcidx); - rc = -EPROTO; - break; - } - - if (dstidx == dstrd->rd_nfrags) { - CERROR("Dst buffer exhausted: %d frags\n", dstidx); - rc = -EPROTO; - break; - } - - if (tx->tx_nwrq == IBLND_MAX_RDMA_FRAGS) { - CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n", - srcidx, srcrd->rd_nfrags, - dstidx, dstrd->rd_nfrags); - rc = -EMSGSIZE; - break; - } - - wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid); - - sge = &tx->tx_sge[tx->tx_nwrq]; - sge->addr = srcfrag->rf_addr; - sge->length = wrknob; - sge->lkey = srcrd->rd_key; - - wrq = &tx->tx_wrq[tx->tx_nwrq]; - - wrq->next = wrq + 1; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_RDMA_WRITE; - wrq->send_flags = 0; - - wrq->wr.rdma.remote_addr = dstfrag->rf_addr; - wrq->wr.rdma.rkey = dstrd->rd_key; - - wrq++; - sge++; - - resid -= wrknob; - if (wrknob < srcfrag->rf_nob) { - srcfrag->rf_nob -= wrknob; - srcfrag->rf_addr += wrknob; - } else { - srcfrag++; - srcidx++; - } - - if (wrknob < dstfrag->rf_nob) { - dstfrag->rf_nob -= wrknob; - dstfrag->rf_addr += wrknob; - } else { - dstfrag++; - dstidx++; - } - - tx->tx_nwrq++; - } - - if (rc < 0) /* no RDMA if completing with failure */ - tx->tx_nwrq = 0; -#endif - ibmsg->ibm_u.completion.ibcm_status = rc; - ibmsg->ibm_u.completion.ibcm_cookie = dstcookie; - kiblnd_init_tx_msg(ni, tx, type, sizeof (kib_completion_msg_t)); - - return rc; -} - -void -kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) -{ - struct list_head *q; - - LASSERT (tx->tx_nwrq > 0); /* work items set up */ - LASSERT (!tx->tx_queued); /* not queued for sending already */ - - tx->tx_queued = 1; - tx->tx_deadline = jiffies + (*kiblnd_tunables.kib_timeout * HZ); - - if (tx->tx_conn == NULL) { - kiblnd_conn_addref(conn); - tx->tx_conn = conn; - LASSERT (tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE); - } else { - /* PUT_DONE first attached to conn as a PUT_REQ */ - LASSERT (tx->tx_conn == conn); - LASSERT (tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE); - } - - switch (tx->tx_msg->ibm_type) { - default: - LBUG(); - - case IBLND_MSG_PUT_REQ: - case IBLND_MSG_GET_REQ: - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_ACK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - q = &conn->ibc_tx_queue_nocred; - break; - - case IBLND_MSG_NOOP: - q = &conn->ibc_tx_noops; - break; - - case IBLND_MSG_IMMEDIATE: - q = &conn->ibc_tx_queue; - break; - } - - list_add_tail(&tx->tx_list, q); -} - -void -kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn) -{ - spin_lock(&conn->ibc_lock); - kiblnd_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - - kiblnd_check_sends(conn); -} - -void -kiblnd_connect_peer (kib_peer_t *peer) -{ - struct rdma_cm_id *cmid; - kib_net_t *net = peer->ibp_ni->ni_data; - struct sockaddr_in srcaddr; - struct sockaddr_in dstaddr; - int rc; - - LASSERT (net != NULL); - LASSERT (peer->ibp_connecting > 0); - - cmid = rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP); - if (IS_ERR(cmid)) { - CERROR("Can't create CMID for %s: %ld\n", - libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid)); - rc = PTR_ERR(cmid); - goto failed; - } - - memset(&srcaddr, 0, sizeof(srcaddr)); - srcaddr.sin_family = AF_INET; - srcaddr.sin_addr.s_addr = htonl(net->ibn_dev->ibd_ifip); - - memset(&dstaddr, 0, sizeof(dstaddr)); - dstaddr.sin_family = AF_INET; - dstaddr.sin_port = htons(*kiblnd_tunables.kib_service); - dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid)); - - kiblnd_peer_addref(peer); /* cmid's ref */ - - rc = rdma_resolve_addr(cmid, - (struct sockaddr *)&srcaddr, - (struct sockaddr *)&dstaddr, - *kiblnd_tunables.kib_timeout * 1000); - if (rc == 0) - return; - - /* Can't initiate address resolution: */ - CERROR("Can't resolve addr for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - - kiblnd_peer_decref(peer); /* cmid's ref */ - rdma_destroy_id(cmid); - failed: - kiblnd_peer_connect_failed(peer, 1, rc); -} - -void -kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) -{ - kib_peer_t *peer; - kib_peer_t *peer2; - kib_conn_t *conn; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - unsigned long flags; - int rc; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx->tx_nwrq > 0); /* work items have been set up */ - - /* First time, just use a read lock since I expect to find my peer - * connected */ - read_lock_irqsave(g_lock, flags); - - peer = kiblnd_find_peer_locked(nid); - if (peer != NULL && !list_empty(&peer->ibp_conns)) { - /* Found a peer with an established connection */ - conn = kiblnd_get_conn_locked(peer); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(g_lock, flags); - - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - return; - } - - read_unlock(g_lock); - /* Re-try with a write lock */ - write_lock(g_lock); - - peer = kiblnd_find_peer_locked(nid); - if (peer != NULL) { - if (list_empty(&peer->ibp_conns)) { - /* found a peer, but it's still connecting... */ - LASSERT (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0); - list_add_tail (&tx->tx_list, &peer->ibp_tx_queue); - write_unlock_irqrestore(g_lock, flags); - } else { - conn = kiblnd_get_conn_locked(peer); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - write_unlock_irqrestore(g_lock, flags); - - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - return; - } - - write_unlock_irqrestore(g_lock, flags); - - /* Allocate a peer ready to add to the peer table and retry */ - rc = kiblnd_create_peer(ni, &peer, nid); - if (rc != 0) { - CERROR("Can't create peer %s\n", libcfs_nid2str(nid)); - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kiblnd_tx_done(ni, tx); - return; - } - - write_lock_irqsave(g_lock, flags); - - peer2 = kiblnd_find_peer_locked(nid); - if (peer2 != NULL) { - if (list_empty(&peer2->ibp_conns)) { - /* found a peer, but it's still connecting... */ - LASSERT (peer2->ibp_connecting != 0 || - peer2->ibp_accepting != 0); - list_add_tail (&tx->tx_list, &peer2->ibp_tx_queue); - write_unlock_irqrestore(g_lock, flags); - } else { - conn = kiblnd_get_conn_locked(peer2); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - write_unlock_irqrestore(g_lock, flags); - - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - - kiblnd_peer_decref(peer); - return; - } - - /* Brand new peer */ - LASSERT (peer->ibp_connecting == 0); - peer->ibp_connecting = 1; - - /* always called with a ref on ni, which prevents ni being shutdown */ - LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0); - - list_add_tail(&tx->tx_list, &peer->ibp_tx_queue); - - kiblnd_peer_addref(peer); - list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid)); - - write_unlock_irqrestore(g_lock, flags); - - kiblnd_connect_peer(peer); - kiblnd_peer_decref(peer); -} - -int -kiblnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kib_msg_t *ibmsg; - kib_tx_t *tx; - int nob; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* Thread context */ - LASSERT (!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - switch (type) { - default: - LBUG(); - return (-EIO); - - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBLND_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR("Can allocate txd for GET to %s: \n", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.get.ibgm_hdr = *hdr; - ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie; - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - rc = kiblnd_setup_rd_iov(ni, tx, - &ibmsg->ibm_u.get.ibgm_rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kiblnd_setup_rd_kiov(ni, tx, - &ibmsg->ibm_u.get.ibgm_rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc != 0) { - CERROR("Can't setup GET sink for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kiblnd_tx_done(ni, tx); - return -EIO; - } -#if IBLND_MAP_ON_DEMAND - nob = sizeof(kib_get_msg_t); -#else - nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[tx->tx_nfrags]); -#endif - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob); - - tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR("Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */ - tx->tx_waiting = 1; /* waiting for GET_DONE */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBLND_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR("Can't allocate %s txd for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - if (payload_kiov == NULL) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - payload_niov, payload_iov, - payload_offset, payload_nob); - else - rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, - payload_niov, payload_kiov, - payload_offset, payload_nob); - if (rc != 0) { - CERROR("Can't setup PUT src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.putreq.ibprm_hdr = *hdr; - ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie; - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(kib_putreq_msg_t)); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]) - <= IBLND_MSG_SIZE); - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR ("Can't send %d to %s: tx descs exhausted\n", - type, libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); - - nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]); - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; -} - -void -kiblnd_reply (lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg) -{ - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kib_tx_t *tx; - int rc; - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR("Can't get tx for REPLY to %s\n", - libcfs_nid2str(target.nid)); - goto failed_0; - } - - if (nob == 0) - rc = 0; - else if (kiov == NULL) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - niov, iov, offset, nob); - else - rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, - niov, kiov, offset, nob); - - if (rc != 0) { - CERROR("Can't setup GET src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - rc = kiblnd_init_rdma(ni, tx, IBLND_MSG_GET_DONE, nob, - &rx->rx_msg->ibm_u.get.ibgm_rd, - rx->rx_msg->ibm_u.get.ibgm_cookie); - if (rc < 0) { - CERROR("Can't setup rdma for GET from %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - if (nob == 0) { - /* No RDMA: local completion may happen now! */ - lnet_finalize(ni, lntmsg, 0); - } else { - /* RDMA: lnet_finalize(lntmsg) when it - * completes */ - tx->tx_lntmsg[0] = lntmsg; - } - - kiblnd_queue_tx(tx, rx->rx_conn); - return; - - failed_1: - kiblnd_tx_done(ni, tx); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kiblnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kib_rx_t *rx = private; - kib_msg_t *rxmsg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - kib_tx_t *tx; - kib_msg_t *txmsg; - int nob; - int post_credit = IBLND_POSTRX_PEER_CREDIT; - int rc = 0; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBLND_MSG_IMMEDIATE: - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov(niov, kiov, offset, - IBLND_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - IBLND_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - lnet_finalize (ni, lntmsg, 0); - break; - - case IBLND_MSG_PUT_REQ: - if (mlen == 0) { - lnet_finalize(ni, lntmsg, 0); - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR("Can't allocate tx for %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* Not replying will break the connection */ - rc = -ENOMEM; - break; - } - - txmsg = tx->tx_msg; - if (kiov == NULL) - rc = kiblnd_setup_rd_iov(ni, tx, - &txmsg->ibm_u.putack.ibpam_rd, - niov, iov, offset, mlen); - else - rc = kiblnd_setup_rd_kiov(ni, tx, - &txmsg->ibm_u.putack.ibpam_rd, - niov, kiov, offset, mlen); - if (rc != 0) { - CERROR("Can't setup PUT sink for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kiblnd_tx_done(ni, tx); - /* tell peer it's over */ - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie; - txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie; -#if IBLND_MAP_ON_DEMAND - nob = sizeof(kib_putack_msg_t); -#else - nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[tx->tx_nfrags]); -#endif - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_DONE */ - kiblnd_queue_tx(tx, conn); - - /* reposted buffer reserved for PUT_DONE */ - post_credit = IBLND_POSTRX_NO_CREDIT; - break; - - case IBLND_MSG_GET_REQ: - if (lntmsg != NULL) { - /* Optimized GET; RDMA lntmsg's payload */ - kiblnd_reply(ni, rx, lntmsg); - } else { - /* GET didn't match anything */ - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE, - -ENODATA, - rxmsg->ibm_u.get.ibgm_cookie); - } - break; - } - - kiblnd_post_rx(rx, post_credit); - return rc; -} - -int -kiblnd_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kiblnd_data.kib_nthreads); - return (0); -} - -void -kiblnd_thread_fini (void) -{ - atomic_dec (&kiblnd_data.kib_nthreads); -} - -void -kiblnd_peer_alive (kib_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->ibp_last_alive = cfs_time_current(); - mb(); -} - -void -kiblnd_peer_notify (kib_peer_t *peer) -{ - time_t last_alive = 0; - int error = 0; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (list_empty(&peer->ibp_conns) && - peer->ibp_accepting == 0 && - peer->ibp_connecting == 0 && - peer->ibp_error != 0) { - error = peer->ibp_error; - peer->ibp_error = 0; - - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive); - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (error != 0) - lnet_notify(peer->ibp_ni, - peer->ibp_nid, 0, last_alive); -} - -void -kiblnd_close_conn_locked (kib_conn_t *conn, int error) -{ - /* This just does the immediate housekeeping. 'error' is zero for a - * normal shutdown which can happen only after the connection has been - * established. If the connection is established, schedule the - * connection to be finished off by the connd. Otherwise the connd is - * already dealing with it (either to set it up or tear it down). - * Caller holds kib_global_lock exclusively in irq context */ - unsigned long flags; - kib_peer_t *peer = conn->ibc_peer; - - LASSERT (error != 0 || conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - if (error != 0 && conn->ibc_comms_error == 0) - conn->ibc_comms_error = error; - - if (conn->ibc_state != IBLND_CONN_ESTABLISHED) - return; /* already being handled */ - - if (error == 0 && - list_empty(&conn->ibc_tx_noops) && - list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_rsrvd) && - list_empty(&conn->ibc_tx_queue_nocred) && - list_empty(&conn->ibc_active_txs)) { - CDEBUG(D_NET, "closing conn to %s\n", - libcfs_nid2str(peer->ibp_nid)); - } else { - CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s%s\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)"); - } - - list_del (&conn->ibc_list); - /* connd (see below) takes over ibc_list's ref */ - - if (list_empty (&peer->ibp_conns) && /* no more conns */ - kiblnd_peer_active(peer)) { /* still in peer table */ - kiblnd_unlink_peer_locked(peer); - - /* set/clear error on last conn */ - peer->ibp_error = conn->ibc_comms_error; - } - - kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING); - - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); - - list_add_tail (&conn->ibc_list, &kiblnd_data.kib_connd_conns); - wake_up (&kiblnd_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); -} - -void -kiblnd_close_conn (kib_conn_t *conn, int error) -{ - unsigned long flags; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - kiblnd_close_conn_locked(conn, error); - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -void -kiblnd_handle_early_rxs(kib_conn_t *conn) -{ - unsigned long flags; - kib_rx_t *rx; - - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - while (!list_empty(&conn->ibc_early_rxs)) { - rx = list_entry(conn->ibc_early_rxs.next, - kib_rx_t, rx_list); - list_del(&rx->rx_list); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_handle_rx(rx); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - } - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -void -kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs) -{ - LIST_HEAD (zombies); - struct list_head *tmp; - struct list_head *nxt; - kib_tx_t *tx; - - spin_lock(&conn->ibc_lock); - - list_for_each_safe (tmp, nxt, txs) { - tx = list_entry (tmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || - tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_queued = 0; - tx->tx_waiting = 0; - - if (tx->tx_sending == 0) { - list_del (&tx->tx_list); - list_add (&tx->tx_list, &zombies); - } - } - - spin_unlock(&conn->ibc_lock); - - kiblnd_txlist_done(conn->ibc_peer->ibp_ni, - &zombies, -ECONNABORTED); -} - -void -kiblnd_finalise_conn (kib_conn_t *conn) -{ - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state > IBLND_CONN_INIT); - - kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED); - - /* abort_receives moves QP state to IB_QPS_ERR. This is only required - * for connections that didn't get as far as being connected, because - * rdma_disconnect() does this for free. */ - kiblnd_abort_receives(conn); - - /* Complete all tx descs not waiting for sends to complete. - * NB we should be safe from RDMA now that the QP has changed state */ - - kiblnd_abort_txs(conn, &conn->ibc_tx_noops); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred); - kiblnd_abort_txs(conn, &conn->ibc_active_txs); - - kiblnd_handle_early_rxs(conn); -} - -void -kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error) -{ - LIST_HEAD (zombies); - unsigned long flags; - - LASSERT (error != 0); - LASSERT (!in_interrupt()); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (active) { - LASSERT (peer->ibp_connecting > 0); - peer->ibp_connecting--; - } else { - LASSERT (peer->ibp_accepting > 0); - peer->ibp_accepting--; - } - - if (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0) { - /* another connection attempt under way... */ - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return; - } - - if (list_empty(&peer->ibp_conns)) { - /* Take peer's blocked transmits to complete with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kiblnd_peer_active(peer)) - kiblnd_unlink_peer_locked(peer); - - peer->ibp_error = error; - } else { - /* Can't have blocked transmits if there are connections */ - LASSERT (list_empty(&peer->ibp_tx_queue)); - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_peer_notify(peer); - - if (list_empty (&zombies)) - return; - - CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH); -} - -void -kiblnd_connreq_done(kib_conn_t *conn, int status) -{ - struct list_head txs; - - kib_peer_t *peer = conn->ibc_peer; - int active; - unsigned long flags; - kib_tx_t *tx; - - active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - - CDEBUG(D_NET,"%s: %d, %d\n", libcfs_nid2str(peer->ibp_nid), - active, status); - - LASSERT (!in_interrupt()); - LASSERT ((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT && - peer->ibp_connecting > 0) || - (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT && - peer->ibp_accepting > 0)); - - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - conn->ibc_connvars = NULL; - - if (status != 0) { - /* failed to establish connection */ - kiblnd_peer_connect_failed(peer, active, status); - kiblnd_finalise_conn(conn); - return; - } - - /* connection established */ - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - conn->ibc_last_send = jiffies; - kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED); - kiblnd_peer_alive(peer); - - /* Add conn to peer's list and nuke any dangling conns from a different - * peer instance... */ - kiblnd_conn_addref(conn); /* +1 ref for ibc_list */ - list_add(&conn->ibc_list, &peer->ibp_conns); - if (active) - peer->ibp_connecting--; - else - peer->ibp_accepting--; - - kiblnd_close_stale_conns_locked(peer, conn->ibc_incarnation); - - /* grab pending txs while I have the lock */ - list_add(&txs, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (!kiblnd_peer_active(peer) || /* peer has been deleted */ - conn->ibc_comms_error != 0) { /* error has happened already */ - lnet_ni_t *ni = peer->ibp_ni; - - /* start to shut down connection */ - kiblnd_close_conn_locked(conn, -ECONNABORTED); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_txlist_done(ni, &txs, -ECONNABORTED); - - return; - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* Schedule blocked txs */ - spin_lock (&conn->ibc_lock); - while (!list_empty (&txs)) { - tx = list_entry (txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - kiblnd_queue_tx_locked(tx, conn); - } - spin_unlock (&conn->ibc_lock); - - kiblnd_check_sends(conn); - - /* schedule blocked rxs */ - kiblnd_handle_early_rxs(conn); -} - -void -kiblnd_reject(struct rdma_cm_id *cmid, int why) -{ - int rc; - kib_rej_t rej = {.ibr_magic = IBLND_MSG_MAGIC, - .ibr_version = IBLND_MSG_VERSION, - .ibr_why = why}; - - rc = rdma_reject(cmid, &rej, sizeof(rej)); - - if (rc != 0) - CWARN("Error %d sending reject\n", rc); -} - -int -kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob) -{ - kib_msg_t *ackmsg; - kib_msg_t *reqmsg = priv; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - struct rdma_conn_param cp; - unsigned long flags; - lnet_ni_t *ni = NULL; - kib_dev_t *ibdev; - kib_peer_t *peer; - kib_peer_t *peer2; - kib_conn_t *conn; - lnet_nid_t nid; - int rc; - int rej = IBLND_REJECT_FATAL; - - LASSERT (!in_interrupt()); - - /* cmid inherits 'context' from the corresponding listener id */ - ibdev = (kib_dev_t *)cmid->context; - LASSERT (ibdev != NULL); - - if (priv_nob < offsetof(kib_msg_t, ibm_type)) { - CERROR("Short connection request\n"); - goto failed; - } - - if (reqmsg->ibm_magic == LNET_PROTO_MAGIC || - reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC) || - (reqmsg->ibm_magic == IBLND_MSG_MAGIC && - reqmsg->ibm_version != IBLND_MSG_VERSION) || - (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) && - reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION))) { - /* Future protocol version compatibility support! If the - * o2iblnd-specific protocol changes, or when LNET unifies - * protocols over all LNDs, the initial connection will - * negotiate a protocol version. I trap this here to avoid - * console errors; the reject tells the peer which protocol I - * speak. */ - goto failed; - } - - rc = kiblnd_unpack_msg(reqmsg, priv_nob); - if (rc != 0) { - CERROR("Can't parse connection request: %d\n", rc); - goto failed; - } - - nid = reqmsg->ibm_srcnid; - - if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) { - CERROR("Unexpected connreq msg type: %x from %s\n", - reqmsg->ibm_type, libcfs_nid2str(nid)); - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_queue_depth != IBLND_MSG_QUEUE_SIZE) { - CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_queue_depth, - IBLND_MSG_QUEUE_SIZE); - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) { - CERROR("Can't accept %s: incompatible max_frags %d (%d wanted)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_max_frags, - IBLND_MAX_RDMA_FRAGS); - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("Can't accept %s: message size %d too big (%d max)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - goto failed; - } - - ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid)); - if (ni == NULL || /* no matching net */ - ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ - ((kib_net_t*)ni->ni_data)->ibn_dev != ibdev) { /* wrong device */ - CERROR("Can't accept %s: bad dst nid %s\n", - libcfs_nid2str(nid), - libcfs_nid2str(reqmsg->ibm_dstnid)); - - goto failed; - } - - /* assume 'nid' is a new peer; create */ - rc = kiblnd_create_peer(ni, &peer, nid); - if (rc != 0) { - CERROR("Can't create peer for %s\n", libcfs_nid2str(nid)); - rej = IBLND_REJECT_NO_RESOURCES; - goto failed; - } - - write_lock_irqsave(g_lock, flags); - - peer2 = kiblnd_find_peer_locked(nid); - if (peer2 != NULL) { - /* tie-break connection race in favour of the higher NID */ - if (peer2->ibp_connecting != 0 && - nid < ni->ni_nid) { - write_unlock_irqrestore(g_lock, flags); - - CWARN("Conn race %s\n", - libcfs_nid2str(peer2->ibp_nid)); - - kiblnd_peer_decref(peer); - rej = IBLND_REJECT_CONN_RACE; - goto failed; - } - - peer2->ibp_accepting++; - kiblnd_peer_addref(peer2); - - write_unlock_irqrestore(g_lock, flags); - kiblnd_peer_decref(peer); - peer = peer2; - } else { - /* Brand new peer */ - LASSERT (peer->ibp_accepting == 0); - peer->ibp_accepting = 1; - - /* I have a ref on ni that prevents it being shutdown */ - LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0); - - kiblnd_peer_addref(peer); - list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid)); - - write_unlock_irqrestore(g_lock, flags); - } - - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT); - if (conn == NULL) { - kiblnd_peer_connect_failed(peer, 0, -ENOMEM); - kiblnd_peer_decref(peer); - rej = IBLND_REJECT_NO_RESOURCES; - goto failed; - } - - /* conn now "owns" cmid, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. */ - - conn->ibc_incarnation = reqmsg->ibm_srcstamp; - conn->ibc_credits = IBLND_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBLND_RX_MSGS); - - ackmsg = &conn->ibc_connvars->cv_msg; - memset(ackmsg, 0, sizeof(*ackmsg)); - - kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, - sizeof(ackmsg->ibm_u.connparams)); - ackmsg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE; - ackmsg->ibm_u.connparams.ibcp_max_frags = IBLND_MAX_RDMA_FRAGS; - ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - kiblnd_pack_msg(ni, ackmsg, 0, nid, reqmsg->ibm_srcstamp); - - memset(&cp, 0, sizeof(cp)); - cp.private_data = ackmsg; - cp.private_data_len = ackmsg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - - CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid)); - - rc = rdma_accept(cmid, &cp); - if (rc != 0) { - CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc); - kiblnd_reject(cmid, IBLND_REJECT_FATAL); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } - - lnet_ni_decref(ni); - return 0; - - failed: - if (ni != NULL) - lnet_ni_decref(ni); - - kiblnd_reject(cmid, rej); - return -ECONNREFUSED; -} - -void -kiblnd_reconnect (kib_conn_t *conn, char *why) -{ - kib_peer_t *peer = conn->ibc_peer; - int retry = 0; - unsigned long flags; - - LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - LASSERT (peer->ibp_connecting > 0); /* 'conn' at least */ - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - /* retry connection if it's still needed and no other connection - * attempts (active or passive) are in progress */ - if (!list_empty(&peer->ibp_tx_queue) && - peer->ibp_connecting == 1 && - peer->ibp_accepting == 0) { - retry = 1; - peer->ibp_connecting++; - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (retry) { - CDEBUG(D_NETERROR, "%s: retrying (%s)\n", - libcfs_nid2str(peer->ibp_nid), why); - kiblnd_connect_peer(peer); - } -} - -void -kiblnd_rejected (kib_conn_t *conn, int reason, void *priv, int priv_nob) -{ - kib_peer_t *peer = conn->ibc_peer; - - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - - switch (reason) { - case IB_CM_REJ_STALE_CONN: - kiblnd_reconnect(conn, "stale"); - break; - - case IB_CM_REJ_CONSUMER_DEFINED: - if (priv_nob >= sizeof(kib_rej_t)) { - kib_rej_t *rej = priv; - - if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) || - rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) { - __swab32s(&rej->ibr_magic); - __swab16s(&rej->ibr_version); - } - - if (rej->ibr_magic != IBLND_MSG_MAGIC && - rej->ibr_magic != LNET_PROTO_MAGIC) { - CERROR("%s rejected: consumer defined fatal error\n", - libcfs_nid2str(peer->ibp_nid)); - break; - } - - if (rej->ibr_version != IBLND_MSG_VERSION) { - CERROR("%s rejected: o2iblnd version %d error\n", - libcfs_nid2str(peer->ibp_nid), - rej->ibr_version); - break; - } - - switch (rej->ibr_why) { - case IBLND_REJECT_CONN_RACE: - kiblnd_reconnect(conn, "conn race"); - break; - - case IBLND_REJECT_NO_RESOURCES: - CERROR("%s rejected: o2iblnd no resources\n", - libcfs_nid2str(peer->ibp_nid)); - break; - case IBLND_REJECT_FATAL: - CERROR("%s rejected: o2iblnd fatal error\n", - libcfs_nid2str(peer->ibp_nid)); - break; - default: - CERROR("%s rejected: o2iblnd reason %d\n", - libcfs_nid2str(peer->ibp_nid), - rej->ibr_why); - break; - } - break; - } - /* fall through */ - default: - CDEBUG(D_NETERROR, "%s rejected: reason %d, size %d\n", - libcfs_nid2str(peer->ibp_nid), reason, priv_nob); - break; - } - - kiblnd_connreq_done(conn, -ECONNREFUSED); -} - -void -kiblnd_check_connreply (kib_conn_t *conn, void *priv, int priv_nob) -{ - kib_peer_t *peer = conn->ibc_peer; - lnet_ni_t *ni = peer->ibp_ni; - kib_net_t *net = ni->ni_data; - kib_msg_t *msg = priv; - int rc = kiblnd_unpack_msg(msg, priv_nob); - unsigned long flags; - - LASSERT (net != NULL); - - if (rc != 0) { - CERROR("Can't unpack connack from %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - goto failed; - } - - if (msg->ibm_type != IBLND_MSG_CONNACK) { - CERROR("Unexpected message %d from %s\n", - msg->ibm_type, libcfs_nid2str(peer->ibp_nid)); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBLND_MSG_QUEUE_SIZE) { - CERROR("%s has incompatible queue depth %d(%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_queue_depth, - IBLND_MSG_QUEUE_SIZE); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) { - CERROR("%s has incompatible max_frags %d (%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_max_frags, - IBLND_MAX_RDMA_FRAGS); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("%s max message size %d too big (%d max)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - rc = -EPROTO; - goto failed; - } - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (msg->ibm_dstnid == ni->ni_nid && - msg->ibm_dststamp == net->ibn_incarnation) - rc = 0; - else - rc = -ESTALE; - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (rc != 0) { - CERROR("Stale connection reply from %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed; - } - - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = IBLND_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBLND_RX_MSGS); - - kiblnd_connreq_done(conn, 0); - return; - - failed: - /* NB My QP has already established itself, so I handle anything going - * wrong here by setting ibc_comms_error. - * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then - * immediately tears it down. */ - - LASSERT (rc != 0); - conn->ibc_comms_error = rc; - kiblnd_connreq_done(conn, 0); -} - -int -kiblnd_active_connect (struct rdma_cm_id *cmid) -{ - kib_peer_t *peer = (kib_peer_t *)cmid->context; - kib_conn_t *conn; - kib_msg_t *msg; - struct rdma_conn_param cp; - int rc; - - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT); - if (conn == NULL) { - kiblnd_peer_connect_failed(peer, 1, -ENOMEM); - kiblnd_peer_decref(peer); /* lose cmid's ref */ - return -ENOMEM; - } - - /* conn "owns" cmid now, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. conn also takes over cmid's ref - * on peer */ - - msg = &conn->ibc_connvars->cv_msg; - - memset(msg, 0, sizeof(*msg)); - kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams)); - msg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE; - msg->ibm_u.connparams.ibcp_max_frags = IBLND_MAX_RDMA_FRAGS; - msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - kiblnd_pack_msg(peer->ibp_ni, msg, 0, peer->ibp_nid, 0); - - memset(&cp, 0, sizeof(cp)); - cp.private_data = msg; - cp.private_data_len = msg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - - LASSERT(cmid->context == (void *)conn); - LASSERT(conn->ibc_cmid == cmid); - - rc = rdma_connect(cmid, &cp); - if (rc != 0) { - CERROR("Can't connect to %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } - - return 0; -} - -int -kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) -{ - kib_peer_t *peer; - kib_conn_t *conn; - int rc; - - switch (event->event) { - default: - LBUG(); - - case RDMA_CM_EVENT_CONNECT_REQUEST: - /* destroy cmid on failure */ - rc = kiblnd_passive_connect(cmid, - (void *)KIBLND_CONN_PARAM(event), - KIBLND_CONN_PARAM_LEN(event)); - CDEBUG(D_NET, "connreq: %d\n", rc); - return rc; - - case RDMA_CM_EVENT_ADDR_ERROR: - peer = (kib_peer_t *)cmid->context; - CDEBUG(D_NETERROR, "%s: ADDR ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); - kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc != 0 destroys cmid */ - - case RDMA_CM_EVENT_ADDR_RESOLVED: - peer = (kib_peer_t *)cmid->context; - - CDEBUG(D_NET,"%s Addr resolved: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - - if (event->status != 0) { - CDEBUG(D_NETERROR, "Can't resolve address for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - rc = event->status; - } else { - rc = rdma_resolve_route( - cmid, *kiblnd_tunables.kib_timeout * 1000); - if (rc == 0) - return 0; - /* Can't initiate route resolution */ - CERROR("Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - } - kiblnd_peer_connect_failed(peer, 1, rc); - kiblnd_peer_decref(peer); - return rc; /* rc != 0 destroys cmid */ - - case RDMA_CM_EVENT_ROUTE_ERROR: - peer = (kib_peer_t *)cmid->context; - CDEBUG(D_NETERROR, "%s: ROUTE ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); - kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc != 0 destroys cmid */ - - case RDMA_CM_EVENT_ROUTE_RESOLVED: - peer = (kib_peer_t *)cmid->context; - CDEBUG(D_NET,"%s Route resolved: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - - if (event->status == 0) - return kiblnd_active_connect(cmid); - - CDEBUG(D_NETERROR, "Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, event->status); - kiblnd_peer_decref(peer); - return event->status; /* rc != 0 destroys cmid */ - - case RDMA_CM_EVENT_UNREACHABLE: - conn = (kib_conn_t *)cmid->context; - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || - conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); - CDEBUG(D_NETERROR, "%s: UNREACHABLE %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); - kiblnd_connreq_done(conn, -ENETDOWN); - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_CONNECT_ERROR: - conn = (kib_conn_t *)cmid->context; - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || - conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); - CDEBUG(D_NETERROR, "%s: CONNECT ERROR %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); - kiblnd_connreq_done(conn, -ENOTCONN); - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_REJECTED: - conn = (kib_conn_t *)cmid->context; - switch (conn->ibc_state) { - default: - LBUG(); - - case IBLND_CONN_PASSIVE_WAIT: - CERROR ("%s: REJECTED %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - event->status); - kiblnd_connreq_done(conn, -ECONNRESET); - break; - - case IBLND_CONN_ACTIVE_CONNECT: - kiblnd_rejected(conn, event->status, - (void *)KIBLND_CONN_PARAM(event), - KIBLND_CONN_PARAM_LEN(event)); - break; - } - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_ESTABLISHED: - conn = (kib_conn_t *)cmid->context; - switch (conn->ibc_state) { - default: - LBUG(); - - case IBLND_CONN_PASSIVE_WAIT: - CDEBUG(D_NET, "ESTABLISHED (passive): %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_connreq_done(conn, 0); - break; - - case IBLND_CONN_ACTIVE_CONNECT: - CDEBUG(D_NET, "ESTABLISHED(active): %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_check_connreply(conn, - (void *)KIBLND_CONN_PARAM(event), - KIBLND_CONN_PARAM_LEN(event)); - break; - } - /* net keeps its ref on conn! */ - return 0; - - case RDMA_CM_EVENT_DISCONNECTED: - conn = (kib_conn_t *)cmid->context; - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - CERROR("%s DISCONNECTED\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_connreq_done(conn, -ECONNRESET); - } else { - kiblnd_close_conn(conn, 0); - } - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_DEVICE_REMOVAL: - LCONSOLE_ERROR_MSG(0x131, - "Received notification of device removal\n" - "Please shutdown LNET to allow this to proceed\n"); - /* Can't remove network from underneath LNET for now, so I have - * to ignore this */ - return 0; - } -} - -int -kiblnd_check_txs (kib_conn_t *conn, struct list_head *txs) -{ - kib_tx_t *tx; - struct list_head *ttmp; - int timed_out = 0; - - spin_lock(&conn->ibc_lock); - - list_for_each (ttmp, txs) { - tx = list_entry (ttmp, kib_tx_t, tx_list); - - if (txs != &conn->ibc_active_txs) { - LASSERT (tx->tx_queued); - } else { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } - - if (time_after_eq (jiffies, tx->tx_deadline)) { - timed_out = 1; - break; - } - } - - spin_unlock(&conn->ibc_lock); - return timed_out; -} - -int -kiblnd_conn_timed_out (kib_conn_t *conn) -{ - return kiblnd_check_txs(conn, &conn->ibc_tx_queue) || - kiblnd_check_txs(conn, &conn->ibc_tx_noops) || - kiblnd_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || - kiblnd_check_txs(conn, &conn->ibc_tx_queue_nocred) || - kiblnd_check_txs(conn, &conn->ibc_active_txs); -} - -void -kiblnd_check_conns (int idx) -{ - struct list_head *peers = &kiblnd_data.kib_peers[idx]; - struct list_head *ptmp; - kib_peer_t *peer; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - - again: - /* NB. We expect to have a look at all the peers and not find any - * rdmas to time out, so we just use a shared lock while we - * take a look... */ - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - - list_for_each (ctmp, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - LASSERT (conn->ibc_state == IBLND_CONN_ESTABLISHED); - - /* In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - kiblnd_check_sends(conn); - - if (!kiblnd_conn_timed_out(conn)) - continue; - - /* Handle timeout by closing the whole connection. We - * can only be sure RDMA activity has ceased once the - * QP has been modified. */ - - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - - CERROR("Timed out RDMA with %s\n", - libcfs_nid2str(peer->ibp_nid)); - - kiblnd_close_conn(conn, -ETIMEDOUT); - kiblnd_conn_decref(conn); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -void -kiblnd_disconnect_conn (kib_conn_t *conn) -{ - LASSERT (!in_interrupt()); - LASSERT (current == kiblnd_data.kib_connd); - LASSERT (conn->ibc_state == IBLND_CONN_CLOSING); - - rdma_disconnect(conn->ibc_cmid); - kiblnd_finalise_conn(conn); - - kiblnd_peer_notify(conn->ibc_peer); -} - -int -kiblnd_connd (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - kib_conn_t *conn; - int timeout; - int i; - int dropped_lock; - int peer_index = 0; - unsigned long deadline = jiffies; - - cfs_daemonize ("kiblnd_connd"); - cfs_block_allsigs (); - - init_waitqueue_entry (&wait, current); - kiblnd_data.kib_connd = current; - - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); - - while (!kiblnd_data.kib_shutdown) { - - dropped_lock = 0; - - if (!list_empty (&kiblnd_data.kib_connd_zombies)) { - conn = list_entry (kiblnd_data.kib_connd_zombies.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags); - dropped_lock = 1; - - kiblnd_destroy_conn(conn); - - spin_lock_irqsave (&kiblnd_data.kib_connd_lock, flags); - } - - if (!list_empty (&kiblnd_data.kib_connd_conns)) { - conn = list_entry (kiblnd_data.kib_connd_conns.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags); - dropped_lock = 1; - - kiblnd_disconnect_conn(conn); - kiblnd_conn_decref(conn); - - spin_lock_irqsave (&kiblnd_data.kib_connd_lock, flags); - } - - /* careful with the jiffy wrap... */ - timeout = (int)(deadline - jiffies); - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kiblnd_data.kib_peer_hash_size; - - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); - dropped_lock = 1; - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if (*kiblnd_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kiblnd_tunables.kib_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kiblnd_check_conns(peer_index); - peer_index = (peer_index + 1) % - kiblnd_data.kib_peer_hash_size; - } - - deadline += p * HZ; - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); - } - - if (dropped_lock) - continue; - - /* Nothing to do for 'timeout' */ - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&kiblnd_data.kib_connd_waitq, &wait); - spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags); - - schedule_timeout (timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kiblnd_data.kib_connd_waitq, &wait); - spin_lock_irqsave (&kiblnd_data.kib_connd_lock, flags); - } - - spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags); - - kiblnd_thread_fini(); - return (0); -} - -void -kiblnd_qp_event(struct ib_event *event, void *arg) -{ - kib_conn_t *conn = arg; - - switch (event->event) { - case IB_EVENT_COMM_EST: - CDEBUG(D_NET, "%s established\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - - default: - CERROR("%s: Async QP event type %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event); - return; - } -} - -void -kiblnd_complete (struct ib_wc *wc) -{ - switch (kiblnd_wreqid2type(wc->wr_id)) { - default: - LBUG(); - - case IBLND_WID_RDMA: - /* We only get RDMA completion notification if it fails. All - * subsequent work items, including the final SEND will fail - * too. However we can't print out any more info about the - * failing RDMA because 'tx' might be back on the idle list or - * even reused already if we didn't manage to post all our work - * items */ - CDEBUG(D_NETERROR, "RDMA (tx: %p) failed: %d\n", - kiblnd_wreqid2ptr(wc->wr_id), wc->status); - return; - - case IBLND_WID_TX: - kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status); - return; - - case IBLND_WID_RX: - kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status, - wc->byte_len); - return; - } -} - -void -kiblnd_cq_completion (struct ib_cq *cq, void *arg) -{ - /* NB I'm not allowed to schedule this conn once its refcount has - * reached 0. Since fundamentally I'm racing with scheduler threads - * consuming my CQ I could be called after all completions have - * occurred. But in this case, ibc_nrx == 0 && ibc_nsends_posted == 0 - * and this CQ is about to be destroyed so I NOOP. */ - kib_conn_t *conn = (kib_conn_t *)arg; - unsigned long flags; - - LASSERT (cq == conn->ibc_cq); - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - - conn->ibc_ready = 1; - - if (!conn->ibc_scheduled && - (conn->ibc_nrx > 0 || - conn->ibc_nsends_posted > 0)) { - kiblnd_conn_addref(conn); /* +1 ref for sched_conns */ - conn->ibc_scheduled = 1; - list_add_tail(&conn->ibc_sched_list, - &kiblnd_data.kib_sched_conns); - wake_up(&kiblnd_data.kib_sched_waitq); - } - - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags); -} - -void -kiblnd_cq_event(struct ib_event *event, void *arg) -{ - kib_conn_t *conn = arg; - - CERROR("%s: async CQ event type %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event); -} - -int -kiblnd_scheduler(void *arg) -{ - long id = (long)arg; - wait_queue_t wait; - char name[16]; - unsigned long flags; - kib_conn_t *conn; - struct ib_wc wc; - int rc; - int did_something; - int busy_loops = 0; - - snprintf(name, sizeof(name), "kiblnd_sd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - - while (!kiblnd_data.kib_shutdown) { - if (busy_loops++ >= IBLND_RESCHED) { - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, - flags); - - our_cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - } - - did_something = 0; - - if (!list_empty(&kiblnd_data.kib_sched_conns)) { - conn = list_entry(kiblnd_data.kib_sched_conns.next, - kib_conn_t, ibc_sched_list); - /* take over kib_sched_conns' ref on conn... */ - LASSERT(conn->ibc_scheduled); - list_del(&conn->ibc_sched_list); - conn->ibc_ready = 0; - - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, - flags); - - rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - if (rc == 0) { - rc = ib_req_notify_cq(conn->ibc_cq, - IB_CQ_NEXT_COMP); - LASSERT (rc >= 0); - - rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - } - - LASSERT (rc >= 0); - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, - flags); - - if (rc != 0 || conn->ibc_ready) { - /* There may be another completion waiting; get - * another scheduler to check while I handle - * this one... */ - kiblnd_conn_addref(conn); /* +1 ref for sched_conns */ - list_add_tail(&conn->ibc_sched_list, - &kiblnd_data.kib_sched_conns); - wake_up(&kiblnd_data.kib_sched_waitq); - } else { - conn->ibc_scheduled = 0; - } - - if (rc != 0) { - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, - flags); - - kiblnd_complete(&wc); - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, - flags); - } - - kiblnd_conn_decref(conn); /* ...drop my ref from above */ - did_something = 1; - } - - if (did_something) - continue; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kiblnd_data.kib_sched_waitq, &wait); - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags); - - schedule(); - busy_loops = 0; - - remove_wait_queue(&kiblnd_data.kib_sched_waitq, &wait); - set_current_state(TASK_RUNNING); - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - } - - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags); - - kiblnd_thread_fini(); - return (0); -} diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c deleted file mode 100644 index ce65801a2dfea9e984ac7690c9bfdabef6bff014..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ /dev/null @@ -1,315 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "o2iblnd.h" - -static int service = 987; -CFS_MODULE_PARM(service, "i", int, 0444, - "service number (within RDMA_PS_TCP)"); - -static int cksum = 0; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of message descriptors"); - -static int credits = 64; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static char *ipif_name = "ib0"; -CFS_MODULE_PARM(ipif_name, "s", charp, 0444, - "IPoIB interface name"); - -static int retry_count = 5; -CFS_MODULE_PARM(retry_count, "i", int, 0644, - "Retransmissions when no ACK received"); - -static int rnr_retry_count = 6; -CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644, - "RNR retransmissions"); - -static int keepalive = 100; -CFS_MODULE_PARM(keepalive, "i", int, 0644, - "Idle time in seconds before sending a keepalive"); - -static int ib_mtu = 0; -CFS_MODULE_PARM(ib_mtu, "i", int, 0444, - "IB MTU 256/512/1024/2048/4096"); - -#if IBLND_MAP_ON_DEMAND -static int concurrent_sends = IBLND_RX_MSGS; -#else -static int concurrent_sends = IBLND_MSG_QUEUE_SIZE; -#endif -CFS_MODULE_PARM(concurrent_sends, "i", int, 0444, - "send work-queue sizing"); - -#if IBLND_MAP_ON_DEMAND -static int fmr_pool_size = 512; -CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444, - "size of the fmr pool (>= ntx)"); - -static int fmr_flush_trigger = 384; -CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444, - "# dirty FMRs that triggers pool flush"); - -static int fmr_cache = 1; -CFS_MODULE_PARM(fmr_cache, "i", int, 0444, - "non-zero to enable FMR caching"); -#endif - -kib_tunables_t kiblnd_tunables = { - .kib_service = &service, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_keepalive = &keepalive, - .kib_ntx = &ntx, - .kib_credits = &credits, - .kib_peercredits = &peer_credits, - .kib_default_ipif = &ipif_name, - .kib_retry_count = &retry_count, - .kib_rnr_retry_count = &rnr_retry_count, - .kib_concurrent_sends = &concurrent_sends, - .kib_ib_mtu = &ib_mtu, -#if IBLND_MAP_ON_DEMAND - .kib_fmr_pool_size = &fmr_pool_size, - .kib_fmr_flush_trigger = &fmr_flush_trigger, - .kib_fmr_cache = &fmr_cache, -#endif -}; - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - -static char ipif_basename_space[32]; - -static cfs_sysctl_table_t kiblnd_ctl_table[] = { - { - .ctl_name = 1, - .procname = "service", - .data = &service, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 2, - .procname = "cksum", - .data = &cksum, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 3, - .procname = "timeout", - .data = &timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 4, - .procname = "ntx", - .data = &ntx, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 5, - .procname = "credits", - .data = &credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 6, - .procname = "peer_credits", - .data = &peer_credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 7, - .procname = "ipif_name", - .data = ipif_basename_space, - .maxlen = sizeof(ipif_basename_space), - .mode = 0444, - .proc_handler = &proc_dostring - }, - { - .ctl_name = 8, - .procname = "retry_count", - .data = &retry_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 9, - .procname = "rnr_retry_count", - .data = &rnr_retry_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 10, - .procname = "keepalive", - .data = &keepalive, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 11, - .procname = "concurrent_sends", - .data = &concurrent_sends, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 12, - .procname = "ib_mtu", - .data = &ib_mtu, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, -#if IBLND_MAP_ON_DEMAND - { - .ctl_name = 13, - .procname = "fmr_pool_size", - .data = &fmr_pool_size, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 14, - .procname = "fmr_flush_trigger", - .data = &fmr_flush_trigger, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 15, - .procname = "fmr_cache", - .data = &fmr_cache, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, -#endif - {0} -}; - -static cfs_sysctl_table_t kiblnd_top_ctl_table[] = { - { - .ctl_name = 203, - .procname = "o2iblnd", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = kiblnd_ctl_table - }, - {0} -}; - -void -kiblnd_initstrtunable(char *space, char *str, int size) -{ - strncpy(space, str, size); - space[size-1] = 0; -} - -void -kiblnd_sysctl_init (void) -{ - kiblnd_initstrtunable(ipif_basename_space, ipif_name, - sizeof(ipif_basename_space)); - - kiblnd_tunables.kib_sysctl = - cfs_register_sysctl_table(kiblnd_top_ctl_table, 0); - - if (kiblnd_tunables.kib_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); -} - -void -kiblnd_sysctl_fini (void) -{ - if (kiblnd_tunables.kib_sysctl != NULL) - cfs_unregister_sysctl_table(kiblnd_tunables.kib_sysctl); -} - -#else - -void -kiblnd_sysctl_init (void) -{ -} - -void -kiblnd_sysctl_fini (void) -{ -} - -#endif - -int -kiblnd_tunables_init (void) -{ - kiblnd_sysctl_init(); - - if (*kiblnd_tunables.kib_concurrent_sends > IBLND_RX_MSGS) - *kiblnd_tunables.kib_concurrent_sends = IBLND_RX_MSGS; - if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE) - *kiblnd_tunables.kib_concurrent_sends = IBLND_MSG_QUEUE_SIZE; - - return 0; -} - -void -kiblnd_tunables_fini (void) -{ - kiblnd_sysctl_fini(); -} - - - diff --git a/lnet/klnds/openiblnd/.cvsignore b/lnet/klnds/openiblnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/openiblnd/Makefile.in b/lnet/klnds/openiblnd/Makefile.in deleted file mode 100644 index 86fa9cd37b94dba5c1657f0614e4f0b1d13e75f7..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kopeniblnd -kopeniblnd-objs := openiblnd.o openiblnd_cb.o openiblnd_modparams.o - -EXTRA_POST_CFLAGS := @OPENIBCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/openiblnd/autoMakefile.am b/lnet/klnds/openiblnd/autoMakefile.am deleted file mode 100644 index b4e0fb70aeef8fef8953406350cc3be79936992a..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_OPENIBLND -modulenet_DATA = kopeniblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kopeniblnd-objs:%.o=%.c) openiblnd.h diff --git a/lnet/klnds/openiblnd/openiblnd.c b/lnet/klnds/openiblnd/openiblnd.c deleted file mode 100644 index 73cecc63007f468153e40256260d21939b6935dd..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/openiblnd.c +++ /dev/null @@ -1,1893 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "openiblnd.h" - -lnd_t the_kiblnd = { -#ifdef USING_TSAPI - .lnd_type = CIBLND, -#else - .lnd_type = OPENIBLND, -#endif - .lnd_startup = kibnal_startup, - .lnd_shutdown = kibnal_shutdown, - .lnd_ctl = kibnal_ctl, - .lnd_send = kibnal_send, - .lnd_recv = kibnal_recv, - .lnd_eager_recv = kibnal_eager_recv, - .lnd_accept = kibnal_accept, -}; - -kib_data_t kibnal_data; - -__u32 -kibnal_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kibnal_init_msg(kib_msg_t *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob; -} - -void -kibnal_pack_msg(kib_msg_t *msg, int version, int credits, - lnet_nid_t dstnid, __u64 dststamp) -{ - /* CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. */ - msg->ibm_magic = IBNAL_MSG_MAGIC; - msg->ibm_version = version; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid, - dstnid); - msg->ibm_srcstamp = kibnal_data.kib_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - - if (*kibnal_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob); - } -} - -int -kibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob) -{ - const int hdr_size = offsetof(kib_msg_t, ibm_u); - __u32 msg_cksum; - int msg_version; - int flip; - int msg_nob; - - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - if (msg->ibm_magic == IBNAL_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) { - flip = 1; - } else { - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; - if ((expected_version == 0) ? - (msg_version != IBNAL_MSG_VERSION && - msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) : - (msg_version != expected_version)) { - CERROR("Bad version: %x\n", msg_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kibnal_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - msg->ibm_version = msg_version; - LASSERT (sizeof(msg->ibm_type) == 1); - LASSERT (sizeof(msg->ibm_credits) == 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBNAL_MSG_SVCQRY: - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_SVCRSP: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.svcrsp)) { - CERROR("Short SVCRSP: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.svcrsp))); - return -EPROTO; - } - if (flip) { - __swab64s(&msg->ibm_u.svcrsp.ibsr_svc_id); - __swab16s(&msg->ibm_u.svcrsp.ibsr_svc_pkey); - } - break; - - case IBNAL_MSG_CONNREQ: - case IBNAL_MSG_CONNACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { - CERROR("Short CONNREQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.connparams))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth); - break; - - case IBNAL_MSG_IMMEDIATE: - if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { - CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_RDMA: - case IBNAL_MSG_GET_RDMA: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.rdma)) { - CERROR("Short RDMA req: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.rdma))); - return -EPROTO; - } - if (flip) { - __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_key); - __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_nob); - __swab64s(&msg->ibm_u.rdma.ibrm_desc.rd_addr); - } - break; - - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { - CERROR("Short RDMA completion: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.completion))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - } - return 0; -} - -int -kibnal_make_svcqry (kib_conn_t *conn) -{ - kib_peer_t *peer = conn->ibc_peer; - int version = IBNAL_MSG_VERSION; - int msg_version; - kib_msg_t *msg; - struct socket *sock; - int rc; - int nob; - - LASSERT (conn->ibc_connreq != NULL); - msg = &conn->ibc_connreq->cr_msg; - - again: - kibnal_init_msg(msg, IBNAL_MSG_SVCQRY, 0); - kibnal_pack_msg(msg, version, 0, peer->ibp_nid, 0); - - rc = lnet_connect(&sock, peer->ibp_nid, - 0, peer->ibp_ip, peer->ibp_port); - if (rc != 0) - return -ECONNABORTED; - - rc = libcfs_sock_write(sock, msg, msg->ibm_nob, - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d sending svcqry to %s at %u.%u.%u.%u/%d\n", - rc, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - goto out; - } - - /* The first 6 bytes are invariably MAGIC + proto version */ - rc = libcfs_sock_read(sock, msg, 6, *kibnal_tunables.kib_timeout); - if (rc != 0) { - CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n", - rc, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - goto out; - } - - if (msg->ibm_magic != IBNAL_MSG_MAGIC && - msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) { - CERROR("Bad magic: %08x from %s at %u.%u.%u.%u/%d\n", - msg->ibm_magic, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - rc = -EPROTO; - goto out; - } - - msg_version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ? - msg->ibm_version : __swab16(msg->ibm_version); - if (msg_version != version) { - if (version == IBNAL_MSG_VERSION) { - /* retry with previous version */ - libcfs_sock_release(sock); - version = IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD; - goto again; - } - - CERROR("Bad version %x from %s at %u.%u.%u.%u/%d\n", - msg_version, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - rc = -EPROTO; - goto out; - } - - /* Read in the rest of the message now we know the expected format */ - nob = offsetof(kib_msg_t, ibm_u) + sizeof(kib_svcrsp_t); - rc = libcfs_sock_read(sock, ((char *)msg) + 6, nob - 6, - *kibnal_tunables.kib_timeout); - if (rc != 0) { - CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n", - rc, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - goto out; - } - - rc = kibnal_unpack_msg(msg, version, nob); - if (rc != 0) { - CERROR("Error %d unpacking svcrsp from %s at %u.%u.%u.%u/%d\n", - rc, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - goto out; - } - - if (msg->ibm_type != IBNAL_MSG_SVCRSP) { - CERROR("Unexpected response type %d from %s at %u.%u.%u.%u/%d\n", - msg->ibm_type, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - rc = -EPROTO; - goto out; - } - - if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR("Unexpected dst NID/stamp %s/"LPX64" from " - "%s at %u.%u.%u.%u/%d\n", - libcfs_nid2str(msg->ibm_dstnid), msg->ibm_dststamp, - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), - peer->ibp_port); - rc = -EPROTO; - goto out; - } - - if (!lnet_ptlcompat_matchnid(peer->ibp_nid, msg->ibm_srcnid)) { - CERROR("Unexpected src NID %s from %s at %u.%u.%u.%u/%d\n", - libcfs_nid2str(msg->ibm_srcnid), - libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - rc = -EPROTO; - goto out; - } - - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_connreq->cr_svcrsp = msg->ibm_u.svcrsp; - conn->ibc_version = version; - - out: - libcfs_sock_release(sock); - return rc; -} - -void -kibnal_handle_svcqry (struct socket *sock) -{ - __u32 peer_ip; - unsigned int peer_port; - kib_msg_t *msg; - __u64 srcnid; - __u64 srcstamp; - int version; - int reject = 0; - int rc; - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - if (rc != 0) { - CERROR("Can't get peer's IP: %d\n", rc); - return; - } - - LIBCFS_ALLOC(msg, sizeof(*msg)); - if (msg == NULL) { - CERROR("Can't allocate msgs for %u.%u.%u.%u/%d\n", - HIPQUAD(peer_ip), peer_port); - return; - } - - rc = libcfs_sock_read(sock, &msg->ibm_magic, sizeof(msg->ibm_magic), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d receiving svcqry(1) from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - if (msg->ibm_magic != IBNAL_MSG_MAGIC && - msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) { - /* Unexpected magic! */ - if (the_lnet.ln_ptlcompat == 0) { - if (msg->ibm_magic == LNET_PROTO_MAGIC || - msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) { - /* future protocol version compatibility! - * When LNET unifies protocols over all LNDs, - * the first thing sent will be a version - * query. I send back a reply in my current - * protocol to tell her I'm "old" */ - kibnal_init_msg(msg, 0, 0); - kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0, - LNET_NID_ANY, 0); - reject = 1; - goto reply; - } - - CERROR ("Bad magic(1) %#08x (%#08x expected) from " - "%u.%u.%u.%u/%d\n", msg->ibm_magic, - IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port); - goto out; - } - - /* When portals compatibility is set, I may be passed a new - * connection "blindly" by the acceptor, and I have to - * determine if my peer has sent an acceptor connection request - * or not. */ - rc = lnet_accept(kibnal_data.kib_ni, sock, msg->ibm_magic); - if (rc != 0) - goto out; - - /* It was an acceptor connection request! - * Now I should see my magic... */ - rc = libcfs_sock_read(sock, &msg->ibm_magic, - sizeof(msg->ibm_magic), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d receiving svcqry(2) from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - if (msg->ibm_magic != IBNAL_MSG_MAGIC && - msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) { - CERROR ("Bad magic(2) %#08x (%#08x expected) from " - "%u.%u.%u.%u/%d\n", msg->ibm_magic, - IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port); - goto out; - } - } - - /* Now check version */ - - rc = libcfs_sock_read(sock, &msg->ibm_version, sizeof(msg->ibm_version), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d receiving svcqry(3) from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ? - msg->ibm_version : __swab16(msg->ibm_version); - /* Peer is a different protocol version: reply in my current protocol - * to tell her I'm "old" */ - if (version != IBNAL_MSG_VERSION && - version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - kibnal_init_msg(msg, 0, 0); - kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0, LNET_NID_ANY, 0); - reject = 1; - goto reply; - } - - /* Now read in all the rest */ - rc = libcfs_sock_read(sock, &msg->ibm_type, - offsetof(kib_msg_t, ibm_u) - - offsetof(kib_msg_t, ibm_type), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d receiving svcqry(4) from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - rc = kibnal_unpack_msg(msg, version, offsetof(kib_msg_t, ibm_u)); - if (rc != 0) { - CERROR("Error %d unpacking svcqry from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - if (msg->ibm_type != IBNAL_MSG_SVCQRY) { - CERROR("Unexpected message %d from %u.%u.%u.%u/%d\n", - msg->ibm_type, HIPQUAD(peer_ip), peer_port); - goto out; - } - - if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid)) { - CERROR("Unexpected dstnid %s: expected %s from %u.%u.%u.%u/%d\n", - libcfs_nid2str(msg->ibm_dstnid), - libcfs_nid2str(kibnal_data.kib_ni->ni_nid), - HIPQUAD(peer_ip), peer_port); - goto out; - } - - srcnid = msg->ibm_srcnid; - srcstamp = msg->ibm_srcstamp; - - kibnal_init_msg(msg, IBNAL_MSG_SVCRSP, sizeof(msg->ibm_u.svcrsp)); - - msg->ibm_u.svcrsp.ibsr_svc_id = kibnal_data.kib_svc_id; - memcpy(msg->ibm_u.svcrsp.ibsr_svc_gid, kibnal_data.kib_svc_gid, - sizeof(kibnal_data.kib_svc_gid)); - msg->ibm_u.svcrsp.ibsr_svc_pkey = kibnal_data.kib_svc_pkey; - - kibnal_pack_msg(msg, version, 0, srcnid, srcstamp); - - reply: - rc = libcfs_sock_write (sock, msg, msg->ibm_nob, - lnet_acceptor_timeout()); - if (!reject && rc != 0) { - /* Only complain if we're not rejecting */ - CERROR("Error %d replying to svcqry from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - out: - LIBCFS_FREE(msg, sizeof(*msg)); -} - -void -kibnal_free_acceptsock (kib_acceptsock_t *as) -{ - libcfs_sock_release(as->ibas_sock); - LIBCFS_FREE(as, sizeof(*as)); -} - -int -kibnal_accept(lnet_ni_t *ni, struct socket *sock) -{ - kib_acceptsock_t *as; - unsigned long flags; - - LIBCFS_ALLOC(as, sizeof(*as)); - if (as == NULL) { - CERROR("Out of Memory\n"); - return -ENOMEM; - } - - as->ibas_sock = sock; - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail(&as->ibas_list, &kibnal_data.kib_connd_acceptq); - wake_up(&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); - return 0; -} - -int -kibnal_start_ib_listener (void) -{ - int rc; - - LASSERT (kibnal_data.kib_listen_handle == NULL); - - kibnal_data.kib_svc_id = ib_cm_service_assign(); - CDEBUG(D_NET, "svc id "LPX64"\n", kibnal_data.kib_svc_id); - - rc = ib_cached_gid_get(kibnal_data.kib_device, - kibnal_data.kib_port, 0, - kibnal_data.kib_svc_gid); - if (rc != 0) { - CERROR("Can't get port %d GID: %d\n", - kibnal_data.kib_port, rc); - return rc; - } - - rc = ib_cached_pkey_get(kibnal_data.kib_device, - kibnal_data.kib_port, 0, - &kibnal_data.kib_svc_pkey); - if (rc != 0) { - CERROR ("Can't get port %d PKEY: %d\n", - kibnal_data.kib_port, rc); - return rc; - } - - rc = ib_cm_listen(kibnal_data.kib_svc_id, - TS_IB_CM_SERVICE_EXACT_MASK, - kibnal_passive_conn_callback, NULL, - &kibnal_data.kib_listen_handle); - if (rc != 0) { - kibnal_data.kib_listen_handle = NULL; - CERROR ("Can't create IB listener: %d\n", rc); - return rc; - } - - LASSERT (kibnal_data.kib_listen_handle != NULL); - return 0; -} - -void -kibnal_stop_ib_listener (void) -{ - int rc; - - LASSERT (kibnal_data.kib_listen_handle != NULL); - - rc = ib_cm_listen_stop (kibnal_data.kib_listen_handle); - if (rc != 0) - CERROR("Error stopping IB listener: %d\n", rc); - - kibnal_data.kib_listen_handle = NULL; -} - -int -kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid) -{ - kib_peer_t *peer; - unsigned long flags; - int rc; - - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC(peer, sizeof (*peer)); - if (peer == NULL) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->ibp_nid = nid; - atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD (&peer->ibp_conns); - INIT_LIST_HEAD (&peer->ibp_tx_queue); - INIT_LIST_HEAD (&peer->ibp_connd_list); /* not queued for connecting */ - - peer->ibp_error = 0; - peer->ibp_last_alive = cfs_time_current(); - peer->ibp_reconnect_interval = 0; /* OK to connect at any time */ - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (atomic_read(&kibnal_data.kib_npeers) >= - *kibnal_tunables.kib_concurrent_peers) { - rc = -EOVERFLOW; /* !! but at least it distinguishes */ - } else if (kibnal_data.kib_nonewpeers) { - rc = -ESHUTDOWN; /* shutdown has started */ - } else { - rc = 0; - /* npeers only grows with kib_global_lock held */ - atomic_inc(&kibnal_data.kib_npeers); - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (rc != 0) { - CERROR("Can't create peer: %s\n", - (rc == -ESHUTDOWN) ? "shutting down" : - "too many peers"); - LIBCFS_FREE(peer, sizeof(*peer)); - } else { - *peerp = peer; - } - - return rc; -} - -void -kibnal_destroy_peer (kib_peer_t *peer) -{ - CDEBUG (D_NET, "peer %s %p deleted\n", - libcfs_nid2str(peer->ibp_nid), peer); - - LASSERT (atomic_read (&peer->ibp_refcount) == 0); - LASSERT (peer->ibp_persistence == 0); - LASSERT (!kibnal_peer_active(peer)); - LASSERT (peer->ibp_connecting == 0); - LASSERT (peer->ibp_accepting == 0); - LASSERT (list_empty (&peer->ibp_connd_list)); - LASSERT (list_empty (&peer->ibp_conns)); - LASSERT (list_empty (&peer->ibp_tx_queue)); - - LIBCFS_FREE (peer, sizeof (*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec(&kibnal_data.kib_npeers); -} - -kib_peer_t * -kibnal_find_peer_locked (lnet_nid_t nid) -{ - struct list_head *peer_list = kibnal_nid2peerlist (nid); - struct list_head *tmp; - kib_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry (tmp, kib_peer_t, ibp_list); - - LASSERT (peer->ibp_persistence != 0 || /* persistent peer */ - peer->ibp_connecting != 0 || /* creating conns */ - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); /* active conn */ - - if (peer->ibp_nid != nid) - continue; - - return (peer); - } - return (NULL); -} - -kib_peer_t * -kibnal_get_peer (lnet_nid_t nid) -{ - kib_peer_t *peer; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) /* +1 ref for caller? */ - kibnal_peer_addref(peer); - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - return (peer); -} - -void -kibnal_unlink_peer_locked (kib_peer_t *peer) -{ - LASSERT (peer->ibp_persistence == 0); - LASSERT (list_empty(&peer->ibp_conns)); - - LASSERT (kibnal_peer_active(peer)); - list_del_init (&peer->ibp_list); - /* lose peerlist's ref */ - kibnal_peer_decref(peer); -} - -int -kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp, - int *persistencep) -{ - kib_peer_t *peer; - struct list_head *ptmp; - unsigned long flags; - int i; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *ipp = peer->ibp_ip; - *portp = peer->ibp_port; - *persistencep = peer->ibp_persistence; - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (0); - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (-ENOENT); -} - -int -kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port) -{ - unsigned long flags; - kib_peer_t *peer; - kib_peer_t *peer2; - int rc; - - if (nid == LNET_NID_ANY) - return (-EINVAL); - - rc = kibnal_create_peer (&peer, nid); - if (rc != 0) - return rc; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - /* I'm always called with a reference on kibnal_data.kib_ni - * so shutdown can't have started */ - LASSERT (kibnal_data.kib_nonewpeers == 0); - - peer2 = kibnal_find_peer_locked (nid); - if (peer2 != NULL) { - kibnal_peer_decref(peer); - peer = peer2; - } else { - /* peer table takes existing ref on peer */ - list_add_tail (&peer->ibp_list, - kibnal_nid2peerlist (nid)); - } - - peer->ibp_ip = ip; - peer->ibp_port = port; - peer->ibp_persistence++; - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return (0); -} - -void -kibnal_del_peer_locked (kib_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kib_conn_t *conn; - - peer->ibp_persistence = 0; - - if (list_empty(&peer->ibp_conns)) { - kibnal_unlink_peer_locked(peer); - } else { - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - kibnal_close_conn_locked (conn, 0); - } - /* NB peer is no longer persistent; closing its last conn - * unlinked it. */ - } - /* NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. */ -} - -int -kibnal_del_peer (lnet_nid_t nid) -{ - unsigned long flags; - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - kib_peer_t *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT (list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, &zombies); - } - - kibnal_del_peer_locked (peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_txlist_done(&zombies, -EIO); - - return (rc); -} - -kib_conn_t * -kibnal_get_conn_by_idx (int index) -{ - kib_peer_t *peer; - struct list_head *ptmp; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - int i; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence > 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - list_for_each (ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry (ctmp, kib_conn_t, ibc_list); - kibnal_conn_addref(conn); - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (conn); - } - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (NULL); -} - -kib_conn_t * -kibnal_create_conn (void) -{ - kib_conn_t *conn; - int i; - __u64 vaddr = 0; - __u64 vaddr_base; - int page_offset; - int ipage; - int rc; - union { - struct ib_qp_create_param qp_create; - struct ib_qp_attribute qp_attr; - } params; - - LIBCFS_ALLOC (conn, sizeof (*conn)); - if (conn == NULL) { - CERROR ("Can't allocate connection\n"); - return (NULL); - } - - /* zero flags, NULL pointers etc... */ - memset (conn, 0, sizeof (*conn)); - - INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD (&conn->ibc_tx_queue); - INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD (&conn->ibc_active_txs); - spin_lock_init (&conn->ibc_lock); - - atomic_inc (&kibnal_data.kib_nconns); - /* well not really, but I call destroy() on failure, which decrements */ - - LIBCFS_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t)); - if (conn->ibc_rxs == NULL) - goto failed; - memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - rc = kibnal_alloc_pages(&conn->ibc_rx_pages, - IBNAL_RX_MSG_PAGES, - IB_ACCESS_LOCAL_WRITE); - if (rc != 0) - goto failed; - - vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr; - - for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) { - struct page *page = conn->ibc_rx_pages->ibp_pages[ipage]; - kib_rx_t *rx = &conn->ibc_rxs[i]; - - rx->rx_conn = conn; - rx->rx_vaddr = vaddr; - rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset); - - vaddr += IBNAL_MSG_SIZE; - LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_RX_MSG_PAGES); - } - } - - /* We can post up to IBNAL_RX_MSGS, which may also include an - * additional RDMA work item */ - - params.qp_create = (struct ib_qp_create_param) { - .limit = { - .max_outstanding_send_request = 2 * IBNAL_RX_MSGS, - .max_outstanding_receive_request = IBNAL_RX_MSGS, - .max_send_gather_element = 1, - .max_receive_scatter_element = 1, - }, - .pd = kibnal_data.kib_pd, - .send_queue = kibnal_data.kib_cq, - .receive_queue = kibnal_data.kib_cq, - .send_policy = IB_WQ_SIGNAL_SELECTABLE, - .receive_policy = IB_WQ_SIGNAL_SELECTABLE, - .rd_domain = 0, - .transport = IB_TRANSPORT_RC, - .device_specific = NULL, - }; - - rc = ib_qp_create (¶ms.qp_create, &conn->ibc_qp, &conn->ibc_qpn); - if (rc != 0) { - CERROR ("Failed to create queue pair: %d\n", rc); - goto failed; - } - - /* Mark QP created */ - conn->ibc_state = IBNAL_CONN_INIT_QP; - - params.qp_attr = (struct ib_qp_attribute) { - .state = IB_QP_STATE_INIT, - .port = kibnal_data.kib_port, - .enable_rdma_read = 1, - .enable_rdma_write = 1, - .valid_fields = (IB_QP_ATTRIBUTE_STATE | - IB_QP_ATTRIBUTE_PORT | - IB_QP_ATTRIBUTE_PKEY_INDEX | - IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE), - }; - rc = ib_qp_modify(conn->ibc_qp, ¶ms.qp_attr); - if (rc != 0) { - CERROR ("Failed to modify queue pair: %d\n", rc); - goto failed; - } - - /* 1 ref for caller */ - atomic_set (&conn->ibc_refcount, 1); - return (conn); - - failed: - kibnal_destroy_conn (conn); - return (NULL); -} - -void -kibnal_destroy_conn (kib_conn_t *conn) -{ - int rc; - - CDEBUG (D_NET, "connection %p\n", conn); - - LASSERT (atomic_read (&conn->ibc_refcount) == 0); - LASSERT (list_empty(&conn->ibc_tx_queue)); - LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT (list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT (list_empty(&conn->ibc_active_txs)); - LASSERT (conn->ibc_nsends_posted == 0); - LASSERT (conn->ibc_connreq == NULL); - - switch (conn->ibc_state) { - case IBNAL_CONN_ZOMBIE: - /* called after connection sequence initiated */ - - case IBNAL_CONN_INIT_QP: - rc = ib_qp_destroy(conn->ibc_qp); - if (rc != 0) - CERROR("Can't destroy QP: %d\n", rc); - /* fall through */ - - case IBNAL_CONN_INIT_NOTHING: - break; - - default: - LASSERT (0); - } - - if (conn->ibc_rx_pages != NULL) - kibnal_free_pages(conn->ibc_rx_pages); - - if (conn->ibc_rxs != NULL) - LIBCFS_FREE(conn->ibc_rxs, - IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - if (conn->ibc_peer != NULL) - kibnal_peer_decref(conn->ibc_peer); - - LIBCFS_FREE(conn, sizeof (*conn)); - - atomic_dec(&kibnal_data.kib_nconns); - - if (atomic_read (&kibnal_data.kib_nconns) == 0 && - kibnal_data.kib_shutdown) { - /* I just nuked the last connection on shutdown; wake up - * everyone so they can exit. */ - wake_up_all(&kibnal_data.kib_sched_waitq); - wake_up_all(&kibnal_data.kib_reaper_waitq); - } -} - -int -kibnal_close_peer_conns_locked (kib_peer_t *peer, int why) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - count++; - kibnal_close_conn_locked (conn, why); - } - - return (count); -} - -int -kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - if (conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn %p nid: %s" - " incarnation:"LPX64"("LPX64")\n", conn, - libcfs_nid2str(peer->ibp_nid), - conn->ibc_incarnation, incarnation); - - count++; - kibnal_close_conn_locked (conn, -ESTALE); - } - - return (count); -} - -int -kibnal_close_matching_conns (lnet_nid_t nid) -{ - unsigned long flags; - kib_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kibnal_close_peer_conns_locked (peer, 0); - } - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return (0); - - return (count == 0 ? -ENOENT : 0); -} - -int -kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - LASSERT (ni == kibnal_data.kib_ni); - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - __u32 ip = 0; - int port = 0; - int share_count = 0; - - rc = kibnal_get_peer_info(data->ioc_count, - &nid, &ip, &port, &share_count); - data->ioc_nid = nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - break; - } - case IOC_LIBCFS_ADD_PEER: { - rc = kibnal_add_persistent_peer (data->ioc_nid, - data->ioc_u32[0], /* IP */ - data->ioc_u32[1]); /* port */ - break; - } - case IOC_LIBCFS_DEL_PEER: { - rc = kibnal_del_peer (data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count); - - if (conn == NULL) - rc = -ENOENT; - else { - rc = 0; - data->ioc_nid = conn->ibc_peer->ibp_nid; - kibnal_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kibnal_close_matching_conns (data->ioc_nid); - break; - } - case IOC_LIBCFS_REGISTER_MYNID: { - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) { - rc = 0; - } else { - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - rc = -EINVAL; - } - break; - } - } - - return rc; -} - -void -kibnal_free_pages (kib_pages_t *p) -{ - int npages = p->ibp_npages; - int rc; - int i; - - if (p->ibp_mapped) { - rc = ib_memory_deregister(p->ibp_handle); - if (rc != 0) - CERROR ("Deregister error: %d\n", rc); - } - - for (i = 0; i < npages; i++) - if (p->ibp_pages[i] != NULL) - __free_page(p->ibp_pages[i]); - - LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); -} - -int -kibnal_alloc_pages (kib_pages_t **pp, int npages, int access) -{ - kib_pages_t *p; - struct ib_physical_buffer *phys_pages; - int i; - int rc; - - LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages])); - if (p == NULL) { - CERROR ("Can't allocate buffer %d\n", npages); - return (-ENOMEM); - } - - memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages])); - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_page (GFP_KERNEL); - if (p->ibp_pages[i] == NULL) { - CERROR ("Can't allocate page %d of %d\n", i, npages); - kibnal_free_pages(p); - return (-ENOMEM); - } - } - - LIBCFS_ALLOC(phys_pages, npages * sizeof(*phys_pages)); - if (phys_pages == NULL) { - CERROR ("Can't allocate physarray for %d pages\n", npages); - kibnal_free_pages(p); - return (-ENOMEM); - } - - for (i = 0; i < npages; i++) { - phys_pages[i].size = PAGE_SIZE; - phys_pages[i].address = - lnet_page2phys(p->ibp_pages[i]); - } - - p->ibp_vaddr = 0; - rc = ib_memory_register_physical(kibnal_data.kib_pd, - phys_pages, npages, - &p->ibp_vaddr, - npages * PAGE_SIZE, 0, - access, - &p->ibp_handle, - &p->ibp_lkey, - &p->ibp_rkey); - - LIBCFS_FREE(phys_pages, npages * sizeof(*phys_pages)); - - if (rc != 0) { - CERROR ("Error %d mapping %d pages\n", rc, npages); - kibnal_free_pages(p); - return (rc); - } - - p->ibp_mapped = 1; - *pp = p; - return (0); -} - -int -kibnal_setup_tx_descs (void) -{ - int ipage = 0; - int page_offset = 0; - __u64 vaddr; - __u64 vaddr_base; - struct page *page; - kib_tx_t *tx; - int i; - int rc; - - /* pre-mapped messages are not bigger than 1 page */ - LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0); - - rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, - IBNAL_TX_MSG_PAGES(), - 0); /* local read access only */ - if (rc != 0) - return (rc); - - vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr; - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - page = kibnal_data.kib_tx_pages->ibp_pages[ipage]; - tx = &kibnal_data.kib_tx_descs[i]; - - memset (tx, 0, sizeof(*tx)); /* zero flags etc */ - - tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset); - tx->tx_vaddr = vaddr; - tx->tx_mapped = KIB_TX_UNMAPPED; - - CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n", - i, tx, tx->tx_msg, tx->tx_vaddr); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - vaddr += IBNAL_MSG_SIZE; - LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES()); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_TX_MSG_PAGES()); - } - } - - return (0); -} - -void -kibnal_shutdown (lnet_ni_t *ni) -{ - int i; - int rc; - unsigned long flags; - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - LASSERT(ni == kibnal_data.kib_ni); - LASSERT(ni->ni_data == &kibnal_data); - - switch (kibnal_data.kib_init) { - default: - CERROR ("Unexpected state %d\n", kibnal_data.kib_init); - LBUG(); - - case IBNAL_INIT_ALL: - /* Prevent new peers from being created */ - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - kibnal_data.kib_nonewpeers = 1; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_stop_ib_listener(); - - /* Remove all existing peers from the peer table */ - kibnal_del_peer(LNET_NID_ANY); - - /* Wait for pending conn reqs to be handled */ - i = 2; - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - while (!list_empty(&kibnal_data.kib_connd_acceptq)) { - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, - flags); - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */ - "waiting for conn reqs to clean up\n"); - cfs_pause(cfs_time_seconds(1)); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - } - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read(&kibnal_data.kib_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to close down\n", - atomic_read(&kibnal_data.kib_npeers)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case IBNAL_INIT_CQ: - rc = ib_cq_destroy (kibnal_data.kib_cq); - if (rc != 0) - CERROR ("Destroy CQ error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_TXD: - kibnal_free_pages (kibnal_data.kib_tx_pages); - /* fall through */ -#if IBNAL_FMR - case IBNAL_INIT_FMR: - rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool); - if (rc != 0) - CERROR ("Destroy FMR pool error: %d\n", rc); - /* fall through */ -#endif - case IBNAL_INIT_PD: - rc = ib_pd_destroy(kibnal_data.kib_pd); - if (rc != 0) - CERROR ("Destroy PD error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_DATA: - /* Module refcount only gets to zero when all peers - * have been closed so all lists must be empty */ - LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0); - LASSERT (kibnal_data.kib_peers != NULL); - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - LASSERT (list_empty (&kibnal_data.kib_peers[i])); - } - LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0); - LASSERT (list_empty (&kibnal_data.kib_sched_rxq)); - LASSERT (list_empty (&kibnal_data.kib_sched_txq)); - LASSERT (list_empty (&kibnal_data.kib_reaper_conns)); - LASSERT (list_empty (&kibnal_data.kib_connd_peers)); - LASSERT (list_empty (&kibnal_data.kib_connd_acceptq)); - - /* flag threads to terminate; wake and wait for them to die */ - kibnal_data.kib_shutdown = 1; - wake_up_all (&kibnal_data.kib_sched_waitq); - wake_up_all (&kibnal_data.kib_reaper_waitq); - wake_up_all (&kibnal_data.kib_connd_waitq); - - i = 2; - while (atomic_read (&kibnal_data.kib_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read (&kibnal_data.kib_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case IBNAL_INIT_NOTHING: - break; - } - - if (kibnal_data.kib_tx_descs != NULL) - LIBCFS_FREE (kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - - if (kibnal_data.kib_peers != NULL) - LIBCFS_FREE (kibnal_data.kib_peers, - sizeof (struct list_head) * - kibnal_data.kib_peer_hash_size); - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - kibnal_data.kib_init = IBNAL_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -int -kibnal_get_ipoibidx(void) -{ - /* NB single threaded! */ - static struct ib_port_properties port_props; - - int ipoibidx = 0; - int devidx; - int port; - int rc; - struct ib_device *device; - - for (devidx = 0; devidx <= kibnal_data.kib_hca_idx; devidx++) { - device = ib_device_get_by_index(devidx); - - if (device == NULL) { - CERROR("Can't get IB device %d\n", devidx); - return -1; - } - - for (port = 1; port <= 2; port++) { - if (devidx == kibnal_data.kib_hca_idx && - port == kibnal_data.kib_port) - return ipoibidx; - - rc = ib_port_properties_get(device, port, - &port_props); - if (rc == 0) - ipoibidx++; - } - } - - LBUG(); - return -1; -} - -int -kibnal_startup (lnet_ni_t *ni) -{ - char ipif_name[32]; - __u32 ip; - __u32 netmask; - int up; - struct timeval tv; - int rc; - int hca; - int port; - int i; - int nob; - - LASSERT (ni->ni_lnd == &the_kiblnd); - - /* Only 1 instance supported */ - if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) { - CERROR ("Can't set credits(%d) > ntx(%d)\n", - *kibnal_tunables.kib_credits, - *kibnal_tunables.kib_ntx); - return -EINVAL; - } - - memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */ - - ni->ni_maxtxcredits = *kibnal_tunables.kib_credits; - ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits; - - CLASSERT (LNET_MAX_INTERFACES > 1); - - - kibnal_data.kib_hca_idx = 0; /* default: first HCA */ - kibnal_data.kib_port = 0; /* any port */ - - if (ni->ni_interfaces[0] != NULL) { - /* hca.port specified in 'networks=openib(h.p)' */ - if (ni->ni_interfaces[1] != NULL) { - CERROR("Multiple interfaces not supported\n"); - return -EPERM; - } - - nob = strlen(ni->ni_interfaces[0]); - i = sscanf(ni->ni_interfaces[0], "%d.%d%n", &hca, &port, &nob); - if (i >= 2 && nob == strlen(ni->ni_interfaces[0])) { - kibnal_data.kib_hca_idx = hca; - kibnal_data.kib_port = port; - } else { - nob = strlen(ni->ni_interfaces[0]); - i = sscanf(ni->ni_interfaces[0], "%d%n", &hca, &nob); - - if (i >= 1 && nob == strlen(ni->ni_interfaces[0])) { - kibnal_data.kib_hca_idx = hca; - } else { - CERROR("Can't parse interface '%s'\n", - ni->ni_interfaces[0]); - return -EINVAL; - } - } - } - - kibnal_data.kib_ni = ni; - ni->ni_data = &kibnal_data; - - do_gettimeofday(&tv); - kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - PORTAL_MODULE_USE; - - rwlock_init(&kibnal_data.kib_global_lock); - - kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC (kibnal_data.kib_peers, - sizeof (struct list_head) * kibnal_data.kib_peer_hash_size); - if (kibnal_data.kib_peers == NULL) { - goto failed; - } - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kibnal_data.kib_peers[i]); - - spin_lock_init (&kibnal_data.kib_reaper_lock); - INIT_LIST_HEAD (&kibnal_data.kib_reaper_conns); - init_waitqueue_head (&kibnal_data.kib_reaper_waitq); - - spin_lock_init (&kibnal_data.kib_connd_lock); - INIT_LIST_HEAD (&kibnal_data.kib_connd_acceptq); - INIT_LIST_HEAD (&kibnal_data.kib_connd_peers); - init_waitqueue_head (&kibnal_data.kib_connd_waitq); - - spin_lock_init (&kibnal_data.kib_sched_lock); - INIT_LIST_HEAD (&kibnal_data.kib_sched_txq); - INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq); - init_waitqueue_head (&kibnal_data.kib_sched_waitq); - - spin_lock_init (&kibnal_data.kib_tx_lock); - INIT_LIST_HEAD (&kibnal_data.kib_idle_txs); - - LIBCFS_ALLOC (kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - if (kibnal_data.kib_tx_descs == NULL) { - CERROR ("Can't allocate tx descs\n"); - goto failed; - } - - /* lists/ptrs/locks initialised */ - kibnal_data.kib_init = IBNAL_INIT_DATA; - /*****************************************************/ - - for (i = 0; i < IBNAL_N_SCHED; i++) { - rc = kibnal_thread_start (kibnal_scheduler, - (void *)((unsigned long)i)); - if (rc != 0) { - CERROR("Can't spawn openibnal scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - /* must have at least 2 connds to remain responsive to svcqry while - * connecting */ - if (*kibnal_tunables.kib_n_connd < 2) - *kibnal_tunables.kib_n_connd = 2; - - - for (i = 0; i < *kibnal_tunables.kib_n_connd; i++) { - rc = kibnal_thread_start (kibnal_connd, - (void *)((unsigned long)i)); - if (rc != 0) { - CERROR("Can't spawn openibnal connd[%d]: %d\n", - i, rc); - goto failed; - } - } - - rc = kibnal_thread_start (kibnal_reaper, NULL); - if (rc != 0) { - CERROR ("Can't spawn openibnal reaper: %d\n", rc); - goto failed; - } - - kibnal_data.kib_device = ib_device_get_by_index(kibnal_data.kib_hca_idx); - if (kibnal_data.kib_device == NULL) { - CERROR ("Can't open ib device %d\n", - kibnal_data.kib_hca_idx); - goto failed; - } - - rc = ib_device_properties_get(kibnal_data.kib_device, - &kibnal_data.kib_device_props); - if (rc != 0) { - CERROR ("Can't get device props: %d\n", rc); - goto failed; - } - - CDEBUG(D_NET, "Max Initiator: %d Max Responder %d\n", - kibnal_data.kib_device_props.max_initiator_per_qp, - kibnal_data.kib_device_props.max_responder_per_qp); - - if (kibnal_data.kib_port != 0) { - rc = ib_port_properties_get(kibnal_data.kib_device, - kibnal_data.kib_port, - &kibnal_data.kib_port_props); - if (rc != 0) { - CERROR("Error %d open port %d on HCA %d\n", rc, - kibnal_data.kib_port, - kibnal_data.kib_hca_idx); - goto failed; - } - } else { - for (i = 1; i <= 2; i++) { - rc = ib_port_properties_get(kibnal_data.kib_device, i, - &kibnal_data.kib_port_props); - if (rc == 0) { - kibnal_data.kib_port = i; - break; - } - } - if (kibnal_data.kib_port == 0) { - CERROR ("Can't find a port\n"); - goto failed; - } - } - - i = kibnal_get_ipoibidx(); - if (i < 0) - goto failed; - - snprintf(ipif_name, sizeof(ipif_name), "%s%d", - *kibnal_tunables.kib_ipif_basename, i); - if (strlen(ipif_name) == sizeof(ipif_name) - 1) { - CERROR("IPoIB interface name %s truncated\n", ipif_name); - return -EINVAL; - } - - rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask); - if (rc != 0) { - CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc); - goto failed; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name); - goto failed; - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip); - - rc = ib_pd_create(kibnal_data.kib_device, - NULL, &kibnal_data.kib_pd); - if (rc != 0) { - CERROR ("Can't create PD: %d\n", rc); - goto failed; - } - - /* flag PD initialised */ - kibnal_data.kib_init = IBNAL_INIT_PD; - /*****************************************************/ -#if IBNAL_FMR - { - const int pool_size = *kibnal_tunables.kib_ntx; - struct ib_fmr_pool_param params = { - .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE, - .access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ), - .pool_size = pool_size, - .dirty_watermark = (pool_size * 3)/4, - .flush_function = NULL, - .flush_arg = NULL, - .cache = 1, - }; - rc = ib_fmr_pool_create(kibnal_data.kib_pd, ¶ms, - &kibnal_data.kib_fmr_pool); - if (rc != 0) { - CERROR ("Can't create FMR pool size %d: %d\n", - pool_size, rc); - goto failed; - } - } - - /* flag FMR pool initialised */ - kibnal_data.kib_init = IBNAL_INIT_FMR; -#endif - /*****************************************************/ - - rc = kibnal_setup_tx_descs(); - if (rc != 0) { - CERROR ("Can't register tx descs: %d\n", rc); - goto failed; - } - - /* flag TX descs initialised */ - kibnal_data.kib_init = IBNAL_INIT_TXD; - /*****************************************************/ - - { - struct ib_cq_callback callback = { - .context = IBNAL_CALLBACK_CTXT, - .policy = IB_CQ_PROVIDER_REARM, - .function = { - .entry = kibnal_callback, - }, - .arg = NULL, - }; - int nentries = IBNAL_CQ_ENTRIES(); - - rc = ib_cq_create (kibnal_data.kib_device, - &nentries, &callback, NULL, - &kibnal_data.kib_cq); - if (rc != 0) { - CERROR ("Can't create CQ: %d\n", rc); - goto failed; - } - - /* I only want solicited events */ - rc = ib_cq_request_notification(kibnal_data.kib_cq, 1); - LASSERT (rc == 0); - } - - /* flag CQ initialised */ - kibnal_data.kib_init = IBNAL_INIT_CQ; - /*****************************************************/ - - rc = kibnal_start_ib_listener(); - if (rc != 0) - goto failed; - - /* flag everything initialised */ - kibnal_data.kib_init = IBNAL_INIT_ALL; - /*****************************************************/ - - return 0; - - failed: - kibnal_shutdown(ni); - return -ENETDOWN; -} - -void __exit -kibnal_module_fini (void) -{ - lnet_unregister_lnd(&the_kiblnd); - kibnal_tunables_fini(); -} - -int __init -kibnal_module_init (void) -{ - int rc; - - rc = kibnal_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kiblnd); - - return (0); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -#ifdef USING_TSAPI -MODULE_DESCRIPTION("Kernel Cisco IB LND v1.00"); -#else -MODULE_DESCRIPTION("Kernel OpenIB(gen1) LND v1.00"); -#endif -MODULE_LICENSE("GPL"); - -module_init(kibnal_module_init); -module_exit(kibnal_module_fini); - diff --git a/lnet/klnds/openiblnd/openiblnd.h b/lnet/klnds/openiblnd/openiblnd.h deleted file mode 100644 index 4354dd2d16273eb28787e3843b235c36614c80ac..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/openiblnd.h +++ /dev/null @@ -1,688 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> - -#include <net/sock.h> -#include <linux/in.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -#include <ts_ib_core.h> -#include <ts_ib_cm.h> -#include <ts_ib_sa_client.h> - -#ifndef USING_TSAPI - -/* OpenIB Gen1 */ -typedef struct ib_qp ib_qp_t; -typedef struct ib_mr ib_mr_t; -typedef struct ib_fmr ib_fmr_t; -typedef struct ib_pd ib_pd_t; -typedef struct ib_cq ib_cq_t; -typedef struct ib_fmr_pool ib_fmr_pool_t; - -#else - -/* Cisco (topspin) */ -typedef void ib_qp_t; -typedef void ib_mr_t; -typedef void ib_fmr_t; -typedef void ib_pd_t; -typedef void ib_cq_t; -typedef void ib_fmr_pool_t; - -#define IB_ACCESS_LOCAL_WRITE TS_IB_ACCESS_LOCAL_WRITE -#define IB_WQ_SIGNAL_SELECTABLE TS_IB_ACCESS_LOCAL_WRITE -#define IB_TRANSPORT_RC TS_IB_TRANSPORT_RC -#define IB_QP_STATE_INIT TS_IB_QP_STATE_INIT -#define IB_QP_ATTRIBUTE_STATE TS_IB_QP_ATTRIBUTE_STATE -#define IB_QP_ATTRIBUTE_PORT TS_IB_QP_ATTRIBUTE_PORT -#define IB_QP_ATTRIBUTE_PKEY_INDEX TS_IB_QP_ATTRIBUTE_PKEY_INDEX -#define IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE -#define IB_ACCESS_LOCAL_WRITE TS_IB_ACCESS_LOCAL_WRITE -#define IB_ACCESS_REMOTE_WRITE TS_IB_ACCESS_REMOTE_WRITE -#define IB_ACCESS_REMOTE_READ TS_IB_ACCESS_REMOTE_READ -#define IB_CQ_CALLBACK_INTERRU TS_IB_CQ_CALLBACK_INTERRUPTPT -#define IB_CQ_PROVIDER_REARM TS_IB_CQ_PROVIDER_REARM -#define IB_CQ_CALLBACK_INTERRUPT TS_IB_CQ_CALLBACK_INTERRUPT -#define IB_COMPLETION_STATUS_SUCCESS TS_IB_COMPLETION_STATUS_SUCCESS -#define IB_OP_SEND TS_IB_OP_SEND -#define IB_OP_RDMA_WRITE TS_IB_OP_RDMA_WRITE -#define IB_OP_RDMA_READ TS_IB_OP_RDMA_READ - -#endif - -#ifdef CONFIG_SMP -# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define IBNAL_N_SCHED 1 /* # schedulers */ -#endif - -#define IBNAL_FMR 1 -//#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_PROCESS -#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_INTERRUPT - - -/* tunables fixed at compile time */ -#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */ -#define IBNAL_CREDIT_HIGHWATER 6 /* when to eagerly return credits */ -#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ -#define IBNAL_RDMA_BASE 0x0eeb0000 - -/* QP tunables */ -#define IBNAL_RETRY 7 /* # times to retry */ -#define IBNAL_RNR_RETRY 7 /* */ -#define IBNAL_CM_RETRY 7 /* # times to retry connection */ -#define IBNAL_FLOW_CONTROL 1 -#define IBNAL_RESPONDER_RESOURCES 8 - -/************************/ -/* derived constants... */ - -/* TX messages (shared by all connections) */ -#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx) -#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE) -#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE) - -/* RX messages (per connection) */ -#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE * 2) -#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE) -#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) - -/* we may have up to 2 completions per transmit + - 1 completion per receive, per connection */ -#define IBNAL_CQ_ENTRIES() ((2*IBNAL_TX_MSGS()) + \ - (IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers)) - -typedef struct -{ - char **kib_ipif_basename; /* IPoIB interface base name */ - int *kib_n_connd; /* # connection daemons */ - int *kib_min_reconnect_interval; /* min connect retry seconds... */ - int *kib_max_reconnect_interval; /* max connect retry seconds */ - int *kib_concurrent_peers; /* max # peers */ - int *kib_cksum; /* checksum kib_msg_t? */ - int *kib_timeout; /* comms timeout (seconds) */ - int *kib_keepalive; /* keepalive (seconds) */ - int *kib_ntx; /* # tx descs */ - int *kib_credits; /* # concurrent sends */ - int *kib_peercredits; /* # concurrent sends to 1 peer */ - - cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */ -} kib_tunables_t; - -typedef struct -{ - int ibp_npages; /* # pages */ - int ibp_mapped; /* mapped? */ - __u64 ibp_vaddr; /* mapped region vaddr */ - __u32 ibp_lkey; /* mapped region lkey */ - __u32 ibp_rkey; /* mapped region rkey */ - ib_mr_t *ibp_handle; /* mapped region handle */ - struct page *ibp_pages[0]; -} kib_pages_t; - -typedef struct -{ - int kib_init; /* initialisation state */ - __u64 kib_incarnation; /* which one am I */ - int kib_shutdown; /* shut down? */ - atomic_t kib_nthreads; /* # live threads */ - lnet_ni_t *kib_ni; /* _the_ openib interface */ - - __u64 kib_svc_id; /* service number I listen on */ - tTS_IB_GID kib_svc_gid; /* device/port GID */ - __u16 kib_svc_pkey; /* device/port pkey */ - - void *kib_listen_handle; /* IB listen handle */ - - rwlock_t kib_global_lock; /* stabilize peer/conn ops */ - - struct list_head *kib_peers; /* hash table of all my known peers */ - int kib_peer_hash_size; /* size of kib_peers */ - int kib_nonewpeers; /* prevent new peers? */ - atomic_t kib_npeers; /* # peers extant */ - atomic_t kib_nconns; /* # connections extant */ - - struct list_head kib_reaper_conns; /* connections to reap */ - wait_queue_head_t kib_reaper_waitq; /* reaper sleeps here */ - unsigned long kib_reaper_waketime; /* when reaper will wake */ - spinlock_t kib_reaper_lock; /* serialise */ - - struct list_head kib_connd_peers; /* peers waiting for a connection */ - struct list_head kib_connd_acceptq; /* accepted sockets to handle */ - wait_queue_head_t kib_connd_waitq; /* connection daemons sleep here */ - int kib_connd_connecting; /* # connds connecting */ - spinlock_t kib_connd_lock; /* serialise */ - - wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ - struct list_head kib_sched_txq; /* tx requiring attention */ - struct list_head kib_sched_rxq; /* rx requiring attention */ - spinlock_t kib_sched_lock; /* serialise */ - - struct kib_tx *kib_tx_descs; /* all the tx descriptors */ - kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ - - struct list_head kib_idle_txs; /* idle tx descriptors */ - __u64 kib_next_tx_cookie; /* RDMA completion cookie */ - spinlock_t kib_tx_lock; /* serialise */ - - int kib_hca_idx; /* my HCA number */ - struct ib_device *kib_device; /* "the" device */ - struct ib_device_properties kib_device_props; /* its properties */ - int kib_port; /* port on the device */ - struct ib_port_properties kib_port_props; /* its properties */ - ib_pd_t *kib_pd; /* protection domain */ -#if IBNAL_FMR - ib_fmr_pool_t *kib_fmr_pool; /* fast memory region pool */ -#endif - ib_cq_t *kib_cq; /* completion queue */ - -} kib_data_t; - -#define IBNAL_INIT_NOTHING 0 -#define IBNAL_INIT_DATA 1 -#define IBNAL_INIT_LIB 2 -#define IBNAL_INIT_PD 3 -#define IBNAL_INIT_FMR 4 -#define IBNAL_INIT_TXD 5 -#define IBNAL_INIT_CQ 6 -#define IBNAL_INIT_ALL 7 - -typedef struct kib_acceptsock /* accepted socket queued for connd */ -{ - struct list_head ibas_list; /* queue for attention */ - struct socket *ibas_sock; /* the accepted socket */ -} kib_acceptsock_t; - -/************************************************************************ - * IB Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - * They may be sent via TCP/IP (service ID,GID,PKEY query/response), - * as private data in the connection request/response, or "normally". - */ - -typedef struct kib_svcrsp /* service response */ -{ - __u64 ibsr_svc_id; /* service's id */ - __u8 ibsr_svc_gid[16]; /* service's gid */ - __u16 ibsr_svc_pkey; /* service's pkey */ -} WIRE_ATTR kib_svcrsp_t; - -typedef struct kib_connparams -{ - __u32 ibcp_queue_depth; -} WIRE_ATTR kib_connparams_t; - -typedef struct -{ - union { - ib_mr_t *mr; - ib_fmr_t *fmr; - } md_handle; - __u32 md_lkey; - __u32 md_rkey; - __u64 md_addr; -} kib_md_t; - -typedef struct -{ - __u32 rd_key; /* remote key */ - __u32 rd_nob; /* # of bytes */ - __u64 rd_addr; /* remote io vaddr */ -} WIRE_ATTR kib_rdma_desc_t; - -typedef struct -{ - lnet_hdr_t ibim_hdr; /* portals header */ - char ibim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kib_immediate_msg_t; - -typedef struct -{ - lnet_hdr_t ibrm_hdr; /* portals header */ - __u64 ibrm_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibrm_desc; /* where to suck/blow */ -} WIRE_ATTR kib_rdma_msg_t; - -typedef struct -{ - __u64 ibcm_cookie; /* opaque completion cookie */ - __u32 ibcm_status; /* completion status */ -} WIRE_ATTR kib_completion_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ibm_magic; /* I'm an openibnal message */ - __u16 ibm_version; /* this is my version number */ - - __u8 ibm_type; /* msg type */ - __u8 ibm_credits; /* returned credits */ - __u32 ibm_nob; /* # bytes in whole message */ - __u32 ibm_cksum; /* checksum (0 == no checksum) */ - __u64 ibm_srcnid; /* sender's NID */ - __u64 ibm_srcstamp; /* sender's incarnation */ - __u64 ibm_dstnid; /* destination's NID */ - __u64 ibm_dststamp; /* destination's incarnation */ - union { - kib_svcrsp_t svcrsp; - kib_connparams_t connparams; - kib_immediate_msg_t immediate; - kib_rdma_msg_t rdma; - kib_completion_msg_t completion; - } WIRE_ATTR ibm_u; -} WIRE_ATTR kib_msg_t; - -#define IBNAL_MSG_MAGIC LNET_PROTO_OPENIB_MAGIC /* unique magic */ -#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 2 /* previous protocol version */ -#define IBNAL_MSG_VERSION 3 /* current protocol version */ - -#define IBNAL_MSG_SVCQRY 0xb0 /* service query */ -#define IBNAL_MSG_SVCRSP 0xb1 /* service response */ -#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */ -#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */ -#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define IBNAL_MSG_IMMEDIATE 0xd1 /* portals hdr + payload */ -#define IBNAL_MSG_PUT_RDMA 0xd2 /* portals PUT hdr + source rdma desc */ -#define IBNAL_MSG_PUT_DONE 0xd3 /* signal PUT rdma completion */ -#define IBNAL_MSG_GET_RDMA 0xd4 /* portals GET hdr + sink rdma desc */ -#define IBNAL_MSG_GET_DONE 0xd5 /* signal GET rdma completion */ - -/***********************************************************************/ - -typedef struct kib_rx /* receive message */ -{ - struct list_head rx_list; /* queue for attention */ - struct kib_conn *rx_conn; /* owning conn */ - int rx_nob; /* # bytes received (-1 while posted) */ - __u64 rx_vaddr; /* pre-mapped buffer (hca vaddr) */ - kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */ - struct ib_receive_param rx_sp; /* receive work item */ - struct ib_gather_scatter rx_gl; /* and it's memory */ -} kib_rx_t; - -typedef struct kib_tx /* transmit message */ -{ - struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ - struct kib_conn *tx_conn; /* owning conn */ - int tx_mapped; /* mapped for RDMA? */ - int tx_sending; /* # tx callbacks outstanding */ - int tx_status; /* completion status */ - unsigned long tx_deadline; /* completion deadline */ - int tx_passive_rdma; /* peer sucks/blows */ - int tx_passive_rdma_wait; /* waiting for peer to complete */ - __u64 tx_passive_rdma_cookie; /* completion cookie */ - lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */ - kib_md_t tx_md; /* RDMA mapping (active/passive) */ - __u64 tx_vaddr; /* pre-mapped buffer (hca vaddr) */ - kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */ - int tx_nsp; /* # send work items */ - struct ib_send_param tx_sp[2]; /* send work items... */ - struct ib_gather_scatter tx_gl[2]; /* ...and their memory */ -} kib_tx_t; - -#define KIB_TX_UNMAPPED 0 -#define KIB_TX_MAPPED 1 -#define KIB_TX_MAPPED_FMR 2 - -typedef struct kib_connreq -{ - /* active connection-in-progress state */ - struct kib_conn *cr_conn; - kib_msg_t cr_msg; - __u64 cr_tid; - tTS_IB_GID cr_gid; - kib_svcrsp_t cr_svcrsp; - struct ib_path_record cr_path; - struct ib_cm_active_param cr_connparam; -} kib_connreq_t; - -typedef struct kib_conn -{ - struct kib_peer *ibc_peer; /* owning peer */ - struct list_head ibc_list; /* stash on peer's conn list */ - __u64 ibc_incarnation; /* which instance of the peer */ - int ibc_version; /* peer protocol version */ - atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits; /* # credits for ACK/DONE msgs */ - unsigned long ibc_last_send; /* time of last send */ - struct list_head ibc_tx_queue_nocred; /* sends that don't need a credit */ - struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */ - struct list_head ibc_tx_queue; /* send queue */ - struct list_head ibc_active_txs; /* active tx awaiting completion */ - spinlock_t ibc_lock; /* serialise */ - kib_rx_t *ibc_rxs; /* the rx descs */ - kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ - ib_qp_t *ibc_qp; /* queue pair */ - __u32 ibc_qpn; /* queue pair number */ - tTS_IB_CM_COMM_ID ibc_comm_id; /* connection ID? */ - kib_connreq_t *ibc_connreq; /* connection request state */ -} kib_conn_t; - -#define IBNAL_CONN_INIT_NOTHING 0 /* initial state */ -#define IBNAL_CONN_INIT_QP 1 /* ibc_qp set up */ -#define IBNAL_CONN_CONNECTING 2 /* started to connect */ -#define IBNAL_CONN_ESTABLISHED 3 /* connection established */ -#define IBNAL_CONN_DEATHROW 4 /* waiting to be closed */ -#define IBNAL_CONN_ZOMBIE 5 /* waiting to be freed */ - -typedef struct kib_peer -{ - struct list_head ibp_list; /* stash on global peer list */ - struct list_head ibp_connd_list; /* schedule on kib_connd_peers */ - lnet_nid_t ibp_nid; /* who's on the other end(s) */ - __u32 ibp_ip; /* IP to query for peer conn params */ - int ibp_port; /* port to qery for peer conn params */ - __u64 ibp_incarnation; /* peer's incarnation */ - atomic_t ibp_refcount; /* # users */ - int ibp_persistence; /* "known" peer refs */ - struct list_head ibp_conns; /* all active connections */ - struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - int ibp_connecting; /* current active connection attempts */ - int ibp_accepting; /* current passive connection attempts */ - unsigned long ibp_reconnect_time; /* when reconnect may be attempted */ - unsigned long ibp_reconnect_interval; /* exponential backoff */ - int ibp_error; /* errno on closing this peer */ - cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */ -} kib_peer_t; - -extern kib_data_t kibnal_data; -extern kib_tunables_t kibnal_tunables; - -/******************************************************************************/ - -/* these are purposely avoiding using local vars so they don't increase - * stack consumption. */ - -#define kibnal_conn_addref(conn) \ -do { \ - CDEBUG(D_NET, "conn[%p] (%d)++\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - atomic_inc(&(conn)->ibc_refcount); \ -} while (0) - -#define kibnal_conn_decref(conn) \ -do { \ - unsigned long flags; \ - \ - CDEBUG(D_NET, "conn[%p] (%d)--\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ - spin_lock_irqsave(&kibnal_data.kib_reaper_lock, flags); \ - list_add_tail(&(conn)->ibc_list, \ - &kibnal_data.kib_reaper_conns); \ - wake_up(&kibnal_data.kib_reaper_waitq); \ - spin_unlock_irqrestore(&kibnal_data.kib_reaper_lock, flags); \ - } \ -} while (0) - -#define kibnal_peer_addref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - atomic_inc(&(peer)->ibp_refcount); \ -} while (0) - -#define kibnal_peer_decref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ - kibnal_destroy_peer(peer); \ -} while (0) - -/******************************************************************************/ - -static inline struct list_head * -kibnal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - - return (&kibnal_data.kib_peers [hash]); -} - -static inline int -kibnal_peer_active(kib_peer_t *peer) -{ - /* Am I in the peer hash table? */ - return (!list_empty(&peer->ibp_list)); -} - -static inline void -kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) -{ - struct list_head *q; - - LASSERT (tx->tx_nsp > 0); /* work items set up */ - LASSERT (tx->tx_conn == NULL); /* only set here */ - - kibnal_conn_addref(conn); - tx->tx_conn = conn; - tx->tx_deadline = jiffies + *kibnal_tunables.kib_timeout * HZ; - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* All messages have simple credit control */ - q = &conn->ibc_tx_queue; - } else { - LASSERT (conn->ibc_version == IBNAL_MSG_VERSION); - - switch (tx->tx_msg->ibm_type) { - case IBNAL_MSG_PUT_RDMA: - case IBNAL_MSG_GET_RDMA: - /* RDMA request: reserve a buffer for the RDMA reply - * before sending */ - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - /* RDMA completion: no credits; peer has reserved a - * reply buffer */ - q = &conn->ibc_tx_queue_nocred; - break; - - case IBNAL_MSG_NOOP: - case IBNAL_MSG_IMMEDIATE: - /* Otherwise: consume a credit before sending */ - q = &conn->ibc_tx_queue; - break; - - default: - LBUG(); - q = NULL; - } - } - - list_add_tail(&tx->tx_list, q); -} - -static inline int -kibnal_send_keepalive(kib_conn_t *conn) -{ - return (*kibnal_tunables.kib_keepalive > 0) && - time_after(jiffies, conn->ibc_last_send + - *kibnal_tunables.kib_keepalive*HZ); -} - -/* CAVEAT EMPTOR: - * We rely on tx/rx descriptor alignment to allow us to use the lowest bit - * of the work request id as a flag to determine if the completion is for a - * transmit or a receive. It seems that that the CQ entry's 'op' field - * isn't always set correctly on completions that occur after QP teardown. */ - -static inline __u64 -kibnal_ptr2wreqid (void *ptr, int isrx) -{ - unsigned long lptr = (unsigned long)ptr; - - LASSERT ((lptr & 1) == 0); - return (__u64)(lptr | (isrx ? 1 : 0)); -} - -static inline void * -kibnal_wreqid2ptr (__u64 wreqid) -{ - return (void *)(((unsigned long)wreqid) & ~1UL); -} - -static inline int -kibnal_wreqid_is_rx (__u64 wreqid) -{ - return (wreqid & 1) != 0; -} - -#if (IB_NTXRXPARAMS == 3) -static inline int -kibnal_ib_send(ib_qp_t *qp, struct ib_send_param *p) -{ - return ib_send(qp, p, 1); -} - -static inline int -kibnal_ib_receive(ib_qp_t *qp, struct ib_receive_param *p) -{ - return ib_receive(qp, p, 1); -} -#elif (IB_NTXRXPARAMS == 4) -static inline int -kibnal_ib_send(ib_qp_t *qp, struct ib_send_param *p) -{ - return ib_send(qp, p, 1, NULL); -} - -static inline int -kibnal_ib_receive(ib_qp_t *qp, struct ib_receive_param *p) -{ - return ib_receive(qp, p, 1, NULL); -} -#else - #error "IB_NTXRXPARAMS not set correctly" -#endif - -int kibnal_startup (lnet_ni_t *ni); -void kibnal_shutdown (lnet_ni_t *ni); -int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kibnal_eager_recv (lnet_ni_t *ni, void *private, - lnet_msg_t *lntmsg, void **new_private); -int kibnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int kibnal_accept(lnet_ni_t *ni, struct socket *sock); - -extern void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob); -extern void kibnal_pack_msg(kib_msg_t *msg, int version, int credits, - lnet_nid_t dstnid, __u64 dststamp); -extern int kibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob); -extern void kibnal_handle_svcqry (struct socket *sock); -extern int kibnal_make_svcqry (kib_conn_t *conn); -extern void kibnal_free_acceptsock (kib_acceptsock_t *as); -extern int kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid); -extern void kibnal_destroy_peer (kib_peer_t *peer); -extern int kibnal_add_persistent_peer(lnet_nid_t nid, __u32 ip, int port); -extern int kibnal_del_peer (lnet_nid_t nid); -extern kib_peer_t *kibnal_find_peer_locked (lnet_nid_t nid); -extern void kibnal_unlink_peer_locked (kib_peer_t *peer); -extern void kibnal_peer_alive(kib_peer_t *peer); -extern int kibnal_close_stale_conns_locked (kib_peer_t *peer, - __u64 incarnation); -extern kib_conn_t *kibnal_create_conn (void); -extern void kibnal_destroy_conn (kib_conn_t *conn); -extern int kibnal_alloc_pages (kib_pages_t **pp, int npages, int access); -extern void kibnal_free_pages (kib_pages_t *p); - -extern void kibnal_check_sends (kib_conn_t *conn); - -extern tTS_IB_CM_CALLBACK_RETURN -kibnal_bad_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, - void *param, void *arg); -extern tTS_IB_CM_CALLBACK_RETURN -kibnal_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, - void *param, void *arg); -extern tTS_IB_CM_CALLBACK_RETURN -kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, - void *param, void *arg); - -extern void kibnal_close_conn_locked (kib_conn_t *conn, int error); -extern void kibnal_destroy_conn (kib_conn_t *conn); -extern int kibnal_thread_start (int (*fn)(void *arg), void *arg); -extern int kibnal_scheduler(void *arg); -extern int kibnal_connd (void *arg); -extern int kibnal_reaper (void *arg); -extern void kibnal_callback (ib_cq_t *cq, struct ib_cq_entry *e, void *arg); -extern void kibnal_txlist_done (struct list_head *txlist, int status); -extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); -extern int kibnal_close_conn (kib_conn_t *conn, int why); -extern void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lnet_msg_t *lntmsg, - unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - int offset, int nob); - -extern int kibnal_tunables_init(void); -extern void kibnal_tunables_fini(void); diff --git a/lnet/klnds/openiblnd/openiblnd_cb.c b/lnet/klnds/openiblnd/openiblnd_cb.c deleted file mode 100644 index a231e5ee981e3987656d2b551752b5f6dadb1192..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/openiblnd_cb.c +++ /dev/null @@ -1,2612 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "openiblnd.h" - -/* - * LIB functions follow - * - */ -void -kibnal_schedule_tx_done (kib_tx_t *tx) -{ - unsigned long flags; - - spin_lock_irqsave (&kibnal_data.kib_sched_lock, flags); - - list_add_tail(&tx->tx_list, &kibnal_data.kib_sched_txq); - wake_up (&kibnal_data.kib_sched_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); -} - -void -kibnal_tx_done (kib_tx_t *tx) -{ - lnet_msg_t *lntmsg[2]; - unsigned long flags; - int i; - int rc; - - LASSERT (tx->tx_sending == 0); /* mustn't be awaiting callback */ - LASSERT (!tx->tx_passive_rdma_wait); /* mustn't be awaiting RDMA */ - - if (in_interrupt()) { - /* can't deregister memory/flush FMAs/finalize in IRQ context... */ - kibnal_schedule_tx_done(tx); - return; - } - - switch (tx->tx_mapped) { - default: - LBUG(); - - case KIB_TX_UNMAPPED: - break; - - case KIB_TX_MAPPED: - rc = ib_memory_deregister(tx->tx_md.md_handle.mr); - LASSERT (rc == 0); - tx->tx_mapped = KIB_TX_UNMAPPED; - break; - -#if IBNAL_FMR - case KIB_TX_MAPPED_FMR: - rc = ib_fmr_deregister(tx->tx_md.md_handle.fmr); - LASSERT (rc == 0); - -#ifndef USING_TSAPI - /* Somewhat belt-and-braces since the tx's conn has closed if - * this was a passive RDMA waiting to complete... */ - if (tx->tx_status != 0) - ib_fmr_pool_force_flush(kibnal_data.kib_fmr_pool); -#endif - tx->tx_mapped = KIB_TX_UNMAPPED; - break; -#endif - } - - /* tx may have up to 2 ptlmsgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - rc = tx->tx_status; - - if (tx->tx_conn != NULL) { - kibnal_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nsp = 0; - tx->tx_passive_rdma = 0; - tx->tx_status = 0; - - spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags); - - list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_txs); - - spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (lntmsg[i] == NULL) - continue; - - lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc); - } -} - -kib_tx_t * -kibnal_get_idle_tx (void) -{ - unsigned long flags; - kib_tx_t *tx; - - spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags); - - if (list_empty (&kibnal_data.kib_idle_txs)) { - spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); - return NULL; - } - - tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - /* Allocate a new passive RDMA completion cookie. It might not be - * needed, but we've got a lock right now and we're unlikely to - * wrap... */ - tx->tx_passive_rdma_cookie = kibnal_data.kib_next_tx_cookie++; - - spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); - - LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED); - LASSERT (tx->tx_nsp == 0); - LASSERT (tx->tx_sending == 0); - LASSERT (tx->tx_status == 0); - LASSERT (tx->tx_conn == NULL); - LASSERT (!tx->tx_passive_rdma); - LASSERT (!tx->tx_passive_rdma_wait); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); - - return tx; -} - -void -kibnal_complete_passive_rdma(kib_conn_t *conn, __u64 cookie, int status) -{ - struct list_head *ttmp; - unsigned long flags; - int idle; - - spin_lock_irqsave (&conn->ibc_lock, flags); - - list_for_each (ttmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(ttmp, kib_tx_t, tx_list); - - LASSERT (tx->tx_passive_rdma || - !tx->tx_passive_rdma_wait); - - LASSERT (tx->tx_passive_rdma_wait || - tx->tx_sending != 0); - - if (!tx->tx_passive_rdma_wait || - tx->tx_passive_rdma_cookie != cookie) - continue; - - CDEBUG(D_NET, "Complete %p "LPD64": %d\n", tx, cookie, status); - - /* XXX Set mlength of reply here */ - - tx->tx_status = status; - tx->tx_passive_rdma_wait = 0; - idle = (tx->tx_sending == 0); - - if (idle) - list_del (&tx->tx_list); - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - /* I could be racing with tx callbacks. It's whoever - * _makes_ tx idle that frees it */ - if (idle) - kibnal_tx_done (tx); - return; - } - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - CERROR ("Unmatched (late?) RDMA completion "LPX64" from %s\n", - cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); -} - -void -kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit) -{ - kib_conn_t *conn = rx->rx_conn; - int rc; - unsigned long flags; - - LASSERT(!rsrvd_credit || - conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - - rx->rx_gl = (struct ib_gather_scatter) { - .address = rx->rx_vaddr, - .length = IBNAL_MSG_SIZE, - .key = conn->ibc_rx_pages->ibp_lkey, - }; - - rx->rx_sp = (struct ib_receive_param) { - .work_request_id = kibnal_ptr2wreqid(rx, 1), - .scatter_list = &rx->rx_gl, - .num_scatter_entries = 1, - .device_specific = NULL, - .signaled = 1, - }; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - LASSERT (rx->rx_nob >= 0); /* not posted */ - rx->rx_nob = -1; /* is now */ - mb(); - - if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) - rc = -ECONNABORTED; - else - rc = kibnal_ib_receive(conn->ibc_qp, &rx->rx_sp); - - if (rc == 0) { - if (credit || rsrvd_credit) { - spin_lock_irqsave(&conn->ibc_lock, flags); - - if (credit) - conn->ibc_outstanding_credits++; - if (rsrvd_credit) - conn->ibc_reserved_credits++; - - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - kibnal_check_sends(conn); - } - return; - } - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { - CERROR ("Error posting receive -> %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kibnal_close_conn (rx->rx_conn, rc); - } else { - CDEBUG (D_NET, "Error posting receive -> %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - } - - /* Drop rx's ref */ - kibnal_conn_decref(conn); -} - -void -kibnal_rx_callback (struct ib_cq_entry *e) -{ - kib_rx_t *rx = (kib_rx_t *)kibnal_wreqid2ptr(e->work_request_id); - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - int credits; - unsigned long flags; - int rc; - int err = -ECONNABORTED; - - CDEBUG (D_NET, "rx %p conn %p\n", rx, conn); - LASSERT (rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - mb(); - - /* receives complete with error in any case after we've started - * closing the QP */ - if (conn->ibc_state >= IBNAL_CONN_DEATHROW) - goto failed; - - /* We don't post receives until the conn is established */ - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); - - if (e->status != IB_COMPLETION_STATUS_SUCCESS) { - CERROR("Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), e->status); - goto failed; - } - - LASSERT (e->bytes_transferred >= 0); - rx->rx_nob = e->bytes_transferred; - mb(); - - rc = kibnal_unpack_msg(msg, conn->ibc_version, rx->rx_nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - if (!lnet_ptlcompat_matchnid(conn->ibc_peer->ibp_nid, - msg->ibm_srcnid) || - !lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR ("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - err = -ESTALE; - goto failed; - } - - /* Have I received credits that will let me send? */ - credits = msg->ibm_credits; - if (credits != 0) { - spin_lock_irqsave(&conn->ibc_lock, flags); - conn->ibc_credits += credits; - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - kibnal_check_sends(conn); - } - - switch (msg->ibm_type) { - case IBNAL_MSG_NOOP: - kibnal_post_rx (rx, 1, 0); - return; - - case IBNAL_MSG_IMMEDIATE: - break; - - case IBNAL_MSG_PUT_RDMA: - case IBNAL_MSG_GET_RDMA: - CDEBUG(D_NET, "%d RDMA: cookie "LPX64", key %x, addr "LPX64", nob %d\n", - msg->ibm_type, msg->ibm_u.rdma.ibrm_cookie, - msg->ibm_u.rdma.ibrm_desc.rd_key, - msg->ibm_u.rdma.ibrm_desc.rd_addr, - msg->ibm_u.rdma.ibrm_desc.rd_nob); - break; - - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - CDEBUG(D_NET, "%d DONE: cookie "LPX64", status %d\n", - msg->ibm_type, msg->ibm_u.completion.ibcm_cookie, - msg->ibm_u.completion.ibcm_status); - - kibnal_complete_passive_rdma (conn, - msg->ibm_u.completion.ibcm_cookie, - msg->ibm_u.completion.ibcm_status); - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - kibnal_post_rx (rx, 1, 0); - } else { - /* this reply buffer was pre-reserved */ - kibnal_post_rx (rx, 0, 1); - } - return; - - default: - CERROR ("Bad msg type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - kibnal_peer_alive(conn->ibc_peer); - - /* schedule for kibnal_rx() in thread context */ - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - - list_add_tail (&rx->rx_list, &kibnal_data.kib_sched_rxq); - wake_up (&kibnal_data.kib_sched_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - return; - - failed: - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - kibnal_close_conn(conn, err); - - /* Don't re-post rx & drop its ref on conn */ - kibnal_conn_decref(conn); -} - -void -kibnal_rx (kib_rx_t *rx) -{ - int rc = 0; - kib_msg_t *msg = rx->rx_msg; - - switch (msg->ibm_type) { - case IBNAL_MSG_GET_RDMA: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr, - msg->ibm_srcnid, rx, 1); - break; - - case IBNAL_MSG_PUT_RDMA: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr, - msg->ibm_srcnid, rx, 1); - break; - - case IBNAL_MSG_IMMEDIATE: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - break; - - default: - LBUG(); - break; - } - - if (rc < 0) { - kibnal_close_conn(rx->rx_conn, rc); - kibnal_post_rx (rx, 1, 0); - } -} - -#if 0 -int -kibnal_kvaddr_to_phys (unsigned long vaddr, __u64 *physp) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) - page = vmalloc_to_page ((void *)vaddr); -#ifdef CONFIG_HIGHMEM - else if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) - page = vmalloc_to_page ((void *)vaddr); - /* in 2.4 ^ just walks the page tables */ -#endif - else - page = virt_to_page (vaddr); - - if (page == NULL || - !VALID_PAGE (page)) - return (-EFAULT); - - *physp = lnet_page2phys(page) + (vaddr & (PAGE_SIZE - 1)); - return (0); -} -#endif - -int -kibnal_map_iov (kib_tx_t *tx, int access, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - void *vaddr; - int rc; - - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR ("Can't map multiple vaddr fragments\n"); - return (-EMSGSIZE); - } - - vaddr = (void *)(((unsigned long)iov->iov_base) + offset); - tx->tx_md.md_addr = (__u64)((unsigned long)vaddr); - - rc = ib_memory_register (kibnal_data.kib_pd, - vaddr, nob, - access, - &tx->tx_md.md_handle.mr, - &tx->tx_md.md_lkey, - &tx->tx_md.md_rkey); - - if (rc != 0) { - CERROR ("Can't map vaddr: %d\n", rc); - return (rc); - } - - tx->tx_mapped = KIB_TX_MAPPED; - return (0); -} - -int -kibnal_map_kiov (kib_tx_t *tx, int access, - int nkiov, lnet_kiov_t *kiov, - int offset, int nob) -{ -#if IBNAL_FMR - __u64 *phys; - const int mapped = KIB_TX_MAPPED_FMR; -#else - struct ib_physical_buffer *phys; - const int mapped = KIB_TX_MAPPED; -#endif - int page_offset; - int nphys; - int resid; - int phys_size; - int rc; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - phys_size = nkiov * sizeof (*phys); - LIBCFS_ALLOC(phys, phys_size); - if (phys == NULL) { - CERROR ("Can't allocate tmp phys\n"); - return (-ENOMEM); - } - - page_offset = kiov->kiov_offset + offset; -#if IBNAL_FMR - phys[0] = lnet_page2phys(kiov->kiov_page); -#else - phys[0].address = lnet_page2phys(kiov->kiov_page); - phys[0].size = PAGE_SIZE; -#endif - nphys = 1; - resid = nob - (kiov->kiov_len - offset); - - while (resid > 0) { - kiov++; - nkiov--; - LASSERT (nkiov > 0); - - if (kiov->kiov_offset != 0 || - ((resid > PAGE_SIZE) && - kiov->kiov_len < PAGE_SIZE)) { - int i; - /* Can't have gaps */ - CERROR ("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", nphys, - kiov->kiov_offset, kiov->kiov_len); - - for (i = -nphys; i < nkiov; i++) - { - CERROR("kiov[%d] %p +%d for %d\n", - i, kiov[i].kiov_page, kiov[i].kiov_offset, kiov[i].kiov_len); - } - - rc = -EINVAL; - goto out; - } - - if (nphys == LNET_MAX_IOV) { - CERROR ("payload too big (%d)\n", nphys); - rc = -EMSGSIZE; - goto out; - } - - LASSERT (nphys * sizeof (*phys) < phys_size); -#if IBNAL_FMR - phys[nphys] = lnet_page2phys(kiov->kiov_page); -#else - phys[nphys].address = lnet_page2phys(kiov->kiov_page); - phys[nphys].size = PAGE_SIZE; -#endif - nphys++; - - resid -= PAGE_SIZE; - } - - tx->tx_md.md_addr = IBNAL_RDMA_BASE; - -#if IBNAL_FMR - rc = ib_fmr_register_physical (kibnal_data.kib_fmr_pool, - phys, nphys, - &tx->tx_md.md_addr, - page_offset, - &tx->tx_md.md_handle.fmr, - &tx->tx_md.md_lkey, - &tx->tx_md.md_rkey); -#else - rc = ib_memory_register_physical (kibnal_data.kib_pd, - phys, nphys, - &tx->tx_md.md_addr, - nob, page_offset, - access, - &tx->tx_md.md_handle.mr, - &tx->tx_md.md_lkey, - &tx->tx_md.md_rkey); -#endif - if (rc == 0) { - CDEBUG(D_NET, "Mapped %d pages %d bytes @ offset %d: lkey %x, rkey %x\n", - nphys, nob, page_offset, tx->tx_md.md_lkey, tx->tx_md.md_rkey); - tx->tx_mapped = mapped; - } else { - CERROR ("Can't map phys: %d\n", rc); - rc = -EFAULT; - } - - out: - LIBCFS_FREE(phys, phys_size); - return (rc); -} - -kib_conn_t * -kibnal_find_conn_locked (kib_peer_t *peer) -{ - struct list_head *tmp; - - /* just return the first connection */ - list_for_each (tmp, &peer->ibp_conns) { - return (list_entry(tmp, kib_conn_t, ibc_list)); - } - - return (NULL); -} - -void -kibnal_check_sends (kib_conn_t *conn) -{ - unsigned long flags; - kib_tx_t *tx; - int rc; - int i; - int consume_credit; - int done; - int nwork; - - spin_lock_irqsave (&conn->ibc_lock, flags); - - LASSERT (conn->ibc_nsends_posted <= IBNAL_RX_MSGS); - LASSERT (conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_nocred) && - (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER || - kibnal_send_keepalive(conn))) { - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - tx = kibnal_get_idle_tx(); - if (tx != NULL) - kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0); - - spin_lock_irqsave(&conn->ibc_lock, flags); - - if (tx != NULL) - kibnal_queue_tx_locked(tx, conn); - } - - for (;;) { - if (!list_empty(&conn->ibc_tx_queue_nocred)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry(conn->ibc_tx_queue_nocred.next, - kib_tx_t, tx_list); - consume_credit = 0; - } else if (!list_empty (&conn->ibc_tx_queue)) { - tx = list_entry (conn->ibc_tx_queue.next, - kib_tx_t, tx_list); - consume_credit = 1; - } else { - /* nothing waiting */ - break; - } - - /* We rely on this for QP sizing */ - LASSERT (tx->tx_nsp > 0 && tx->tx_nsp <= 2); - - LASSERT (conn->ibc_outstanding_credits >= 0); - LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE); - LASSERT (conn->ibc_credits >= 0); - LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE); - - /* Not on ibc_rdma_queue */ - LASSERT (!tx->tx_passive_rdma_wait); - - if (conn->ibc_nsends_posted == IBNAL_RX_MSGS) - break; - - if (consume_credit) { - if (conn->ibc_credits == 0) /* no credits */ - break; - - if (conn->ibc_credits == 1 && /* last credit reserved for */ - conn->ibc_outstanding_credits == 0) /* giving back credits */ - break; - } - - list_del (&tx->tx_list); - - if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP && - (!list_empty(&conn->ibc_tx_queue) || - !list_empty(&conn->ibc_tx_queue_nocred) || - (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER && - !kibnal_send_keepalive(conn)))) { - /* redundant NOOP */ - spin_unlock_irqrestore(&conn->ibc_lock, flags); - kibnal_tx_done(tx); - spin_lock_irqsave(&conn->ibc_lock, flags); - continue; - } - - kibnal_pack_msg(tx->tx_msg, conn->ibc_version, - conn->ibc_outstanding_credits, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation); - - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (consume_credit) - conn->ibc_credits--; - - tx->tx_sending = tx->tx_nsp; - tx->tx_passive_rdma_wait = tx->tx_passive_rdma; - list_add (&tx->tx_list, &conn->ibc_active_txs); - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - /* NB the gap between removing tx from the queue and sending it - * allows message re-ordering to occur */ - - LASSERT (tx->tx_nsp > 0); - - rc = -ECONNABORTED; - nwork = 0; - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { - tx->tx_status = 0; - /* Driver only accepts 1 item at a time */ - for (i = 0; i < tx->tx_nsp; i++) { - rc = kibnal_ib_send(conn->ibc_qp, &tx->tx_sp[i]); - if (rc != 0) - break; - nwork++; - } - } - - conn->ibc_last_send = jiffies; - - spin_lock_irqsave (&conn->ibc_lock, flags); - if (rc != 0) { - /* NB credits are transferred in the actual - * message, which can only be the last work item */ - conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; - if (consume_credit) - conn->ibc_credits++; - conn->ibc_nsends_posted--; - - tx->tx_status = rc; - tx->tx_passive_rdma_wait = 0; - tx->tx_sending -= tx->tx_nsp - nwork; - - done = (tx->tx_sending == 0); - if (done) - list_del (&tx->tx_list); - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) - CERROR ("Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - else - CDEBUG (D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_close_conn (conn, rc); - - if (done) - kibnal_tx_done (tx); - return; - } - - } - - spin_unlock_irqrestore (&conn->ibc_lock, flags); -} - -void -kibnal_tx_callback (struct ib_cq_entry *e) -{ - kib_tx_t *tx = (kib_tx_t *)kibnal_wreqid2ptr(e->work_request_id); - kib_conn_t *conn; - unsigned long flags; - int idle; - - conn = tx->tx_conn; - LASSERT (conn != NULL); - LASSERT (tx->tx_sending != 0); - - spin_lock_irqsave(&conn->ibc_lock, flags); - - CDEBUG(D_NET, "conn %p tx %p [%d/%d]: %d\n", conn, tx, - tx->tx_nsp - tx->tx_sending, tx->tx_nsp, - e->status); - - /* I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. If it's - * not me, then I take an extra ref on conn so it can't disappear - * under me. */ - - tx->tx_sending--; - idle = (tx->tx_sending == 0) && /* This is the final callback */ - (!tx->tx_passive_rdma_wait); /* Not waiting for RDMA completion */ - if (idle) - list_del(&tx->tx_list); - - kibnal_conn_addref(conn); - - if (tx->tx_sending == 0) - conn->ibc_nsends_posted--; - - if (e->status != IB_COMPLETION_STATUS_SUCCESS && - tx->tx_status == 0) - tx->tx_status = -ECONNABORTED; - - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - if (idle) - kibnal_tx_done (tx); - - if (e->status != IB_COMPLETION_STATUS_SUCCESS) { - CDEBUG (D_NETERROR, "Tx completion to %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), e->status); - kibnal_close_conn (conn, -ENETDOWN); - } else { - kibnal_peer_alive(conn->ibc_peer); - /* can I shovel some more sends out the door? */ - kibnal_check_sends(conn); - } - - kibnal_conn_decref(conn); -} - -void -kibnal_callback (ib_cq_t *cq, struct ib_cq_entry *e, void *arg) -{ - if (kibnal_wreqid_is_rx(e->work_request_id)) - kibnal_rx_callback (e); - else - kibnal_tx_callback (e); -} - -void -kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob) -{ - struct ib_gather_scatter *gl = &tx->tx_gl[tx->tx_nsp]; - struct ib_send_param *sp = &tx->tx_sp[tx->tx_nsp]; - int fence; - int nob = offsetof (kib_msg_t, ibm_u) + body_nob; - - LASSERT (tx->tx_nsp >= 0 && - tx->tx_nsp < sizeof(tx->tx_sp)/sizeof(tx->tx_sp[0])); - LASSERT (nob <= IBNAL_MSG_SIZE); - - kibnal_init_msg(tx->tx_msg, type, body_nob); - - /* Fence the message if it's bundled with an RDMA read */ - fence = (tx->tx_nsp > 0) && - (type == IBNAL_MSG_PUT_DONE); - - *gl = (struct ib_gather_scatter) { - .address = tx->tx_vaddr, - .length = nob, - .key = kibnal_data.kib_tx_pages->ibp_lkey, - }; - - /* NB If this is an RDMA read, the completion message must wait for - * the RDMA to complete. Sends wait for previous RDMA writes - * anyway... */ - *sp = (struct ib_send_param) { - .work_request_id = kibnal_ptr2wreqid(tx, 0), - .op = IB_OP_SEND, - .gather_list = gl, - .num_gather_entries = 1, - .device_specific = NULL, - .solicited_event = 1, - .signaled = 1, - .immediate_data_valid = 0, - .fence = fence, - .inline_data = 0, - }; - - tx->tx_nsp++; -} - -void -kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn) -{ - unsigned long flags; - - spin_lock_irqsave(&conn->ibc_lock, flags); - - kibnal_queue_tx_locked (tx, conn); - - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - kibnal_check_sends(conn); -} - -void -kibnal_schedule_active_connect_locked (kib_peer_t *peer) -{ - /* Called with exclusive kib_global_lock */ - - peer->ibp_connecting++; - kibnal_peer_addref(peer); /* extra ref for connd */ - - spin_lock (&kibnal_data.kib_connd_lock); - - LASSERT (list_empty(&peer->ibp_connd_list)); - list_add_tail (&peer->ibp_connd_list, - &kibnal_data.kib_connd_peers); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock (&kibnal_data.kib_connd_lock); -} - -void -kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid) -{ - unsigned long flags; - kib_peer_t *peer; - kib_conn_t *conn; - int retry; - int rc; - rwlock_t *g_lock = &kibnal_data.kib_global_lock; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx->tx_nsp > 0); /* work items have been set up */ - - for (retry = 0; ; retry = 1) { - read_lock_irqsave(g_lock, flags); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) { - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - kibnal_conn_addref(conn); /* 1 ref for me...*/ - read_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...until here */ - return; - } - } - - /* Making one or more connections; I'll need a write lock... */ - read_unlock(g_lock); - write_lock(g_lock); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) - break; - - write_unlock_irqrestore (g_lock, flags); - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); - tx->tx_status = -EHOSTUNREACH; - kibnal_tx_done (tx); - return; - } - - rc = kibnal_add_persistent_peer(nid, LNET_NIDADDR(nid), - lnet_acceptor_port()); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_nid2str(nid), rc); - tx->tx_status = rc; - kibnal_tx_done(tx); - return; - } - } - - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - kibnal_conn_addref(conn); /* +1 ref from me... */ - write_unlock_irqrestore (g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...until here */ - return; - } - - if (peer->ibp_connecting == 0 && - peer->ibp_accepting == 0) { - if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */ - time_after_eq(jiffies, peer->ibp_reconnect_time))) { - write_unlock_irqrestore (g_lock, flags); - tx->tx_status = -EHOSTUNREACH; - kibnal_tx_done (tx); - return; - } - - kibnal_schedule_active_connect_locked(peer); - } - - /* A connection is being established; queue the message... */ - list_add_tail (&tx->tx_list, &peer->ibp_tx_queue); - - write_unlock_irqrestore (g_lock, flags); -} - -void -kibnal_txlist_done (struct list_head *txlist, int status) -{ - kib_tx_t *tx; - - while (!list_empty(txlist)) { - tx = list_entry (txlist->next, kib_tx_t, tx_list); - - list_del (&tx->tx_list); - /* complete now */ - tx->tx_status = status; - kibnal_tx_done (tx); - } -} - -int -kibnal_start_passive_rdma (int type, lnet_msg_t *lntmsg, - int niov, struct iovec *iov, lnet_kiov_t *kiov, - int nob) -{ - lnet_nid_t nid = lntmsg->msg_target.nid; - kib_tx_t *tx; - kib_msg_t *ibmsg; - int rc; - int access; - - LASSERT (type == IBNAL_MSG_PUT_RDMA || - type == IBNAL_MSG_GET_RDMA); - LASSERT (nob > 0); - LASSERT (!in_interrupt()); /* Mapping could block */ - - if (type == IBNAL_MSG_PUT_RDMA) { - access = IB_ACCESS_REMOTE_READ; - } else { - access = IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_LOCAL_WRITE; - } - - tx = kibnal_get_idle_tx (); - if (tx == NULL) { - CERROR("Can't allocate %s txd for %s\n", - (type == IBNAL_MSG_PUT_RDMA) ? "PUT/REPLY" : "GET", - libcfs_nid2str(nid)); - return -ENOMEM; - } - - - if (iov != NULL) - rc = kibnal_map_iov (tx, access, niov, iov, 0, nob); - else - rc = kibnal_map_kiov (tx, access, niov, kiov, 0, nob); - - if (rc != 0) { - CERROR ("Can't map RDMA for %s: %d\n", - libcfs_nid2str(nid), rc); - goto failed; - } - - if (type == IBNAL_MSG_GET_RDMA) { - /* reply gets finalized when tx completes */ - tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni, - lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR ("Can't create reply for GET -> %s\n", - libcfs_nid2str(nid)); - rc = -ENOMEM; - goto failed; - } - } - - tx->tx_passive_rdma = 1; - - ibmsg = tx->tx_msg; - - ibmsg->ibm_u.rdma.ibrm_hdr = lntmsg->msg_hdr; - ibmsg->ibm_u.rdma.ibrm_cookie = tx->tx_passive_rdma_cookie; - ibmsg->ibm_u.rdma.ibrm_desc.rd_key = tx->tx_md.md_rkey; - ibmsg->ibm_u.rdma.ibrm_desc.rd_addr = tx->tx_md.md_addr; - ibmsg->ibm_u.rdma.ibrm_desc.rd_nob = nob; - - kibnal_init_tx_msg (tx, type, sizeof (kib_rdma_msg_t)); - - CDEBUG(D_NET, "Passive: %p cookie "LPX64", key %x, addr " - LPX64", nob %d\n", - tx, tx->tx_passive_rdma_cookie, tx->tx_md.md_rkey, - tx->tx_md.md_addr, nob); - - /* lntmsg gets finalized when tx completes. */ - tx->tx_lntmsg[0] = lntmsg; - - kibnal_launch_tx(tx, nid); - return (0); - - failed: - tx->tx_status = rc; - kibnal_tx_done (tx); - return (-EIO); -} - -void -kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lnet_msg_t *lntmsg, - unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - int offset, int nob) -{ - kib_msg_t *rxmsg = rx->rx_msg; - kib_msg_t *txmsg; - kib_tx_t *tx; - int access; - int rdma_op; - int rc; - - CDEBUG(D_NET, "type %d, status %d, niov %d, offset %d, nob %d\n", - type, status, niov, offset, nob); - - /* Called by scheduler */ - LASSERT (!in_interrupt ()); - - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - /* No data if we're completing with failure */ - LASSERT (status == 0 || nob == 0); - - LASSERT (type == IBNAL_MSG_GET_DONE || - type == IBNAL_MSG_PUT_DONE); - - if (type == IBNAL_MSG_GET_DONE) { - access = 0; - rdma_op = IB_OP_RDMA_WRITE; - LASSERT (rxmsg->ibm_type == IBNAL_MSG_GET_RDMA); - } else { - access = IB_ACCESS_LOCAL_WRITE; - rdma_op = IB_OP_RDMA_READ; - LASSERT (rxmsg->ibm_type == IBNAL_MSG_PUT_RDMA); - } - - tx = kibnal_get_idle_tx (); - if (tx == NULL) { - CERROR ("tx descs exhausted on RDMA from %s" - " completing locally with failure\n", - libcfs_nid2str(rx->rx_conn->ibc_peer->ibp_nid)); - lnet_finalize (kibnal_data.kib_ni, lntmsg, -ENOMEM); - return; - } - LASSERT (tx->tx_nsp == 0); - - if (nob != 0) { - /* We actually need to transfer some data (the transfer - * size could get truncated to zero when the incoming - * message is matched) */ - - if (kiov != NULL) - rc = kibnal_map_kiov (tx, access, - niov, kiov, offset, nob); - else - rc = kibnal_map_iov (tx, access, - niov, iov, offset, nob); - - if (rc != 0) { - CERROR ("Can't map RDMA -> %s: %d\n", - libcfs_nid2str(rx->rx_conn->ibc_peer->ibp_nid), - rc); - /* We'll skip the RDMA and complete with failure. */ - status = rc; - nob = 0; - } else { - tx->tx_gl[0] = (struct ib_gather_scatter) { - .address = tx->tx_md.md_addr, - .length = nob, - .key = tx->tx_md.md_lkey, - }; - - tx->tx_sp[0] = (struct ib_send_param) { - .work_request_id = kibnal_ptr2wreqid(tx, 0), - .op = rdma_op, - .gather_list = &tx->tx_gl[0], - .num_gather_entries = 1, - .remote_address = rxmsg->ibm_u.rdma.ibrm_desc.rd_addr, - .rkey = rxmsg->ibm_u.rdma.ibrm_desc.rd_key, - .device_specific = NULL, - .solicited_event = 0, - .signaled = 1, - .immediate_data_valid = 0, - .fence = 0, - .inline_data = 0, - }; - - tx->tx_nsp = 1; - } - } - - txmsg = tx->tx_msg; - - txmsg->ibm_u.completion.ibcm_cookie = rxmsg->ibm_u.rdma.ibrm_cookie; - txmsg->ibm_u.completion.ibcm_status = status; - - kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t)); - - if (status == 0 && nob != 0) { - LASSERT (tx->tx_nsp > 1); - /* RDMA: lntmsg gets finalized when the tx completes. This - * is after the completion message has been sent, which in - * turn is after the RDMA has finished. */ - tx->tx_lntmsg[0] = lntmsg; - } else { - LASSERT (tx->tx_nsp == 1); - /* No RDMA: local completion happens now! */ - CDEBUG(D_NET, "No data: immediate completion\n"); - lnet_finalize (kibnal_data.kib_ni, lntmsg, - status == 0 ? 0 : -EIO); - } - - kibnal_queue_tx(tx, rx->rx_conn); -} - -int -kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kib_msg_t *ibmsg; - kib_tx_t *tx; - int nob; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* Thread context if we're sending payload */ - LASSERT (!in_interrupt() || payload_niov == 0); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - switch (type) { - default: - LBUG(); - return (-EIO); - - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, lntmsg, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, NULL, - lntmsg->msg_md->md_length); - - return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, lntmsg, - lntmsg->msg_md->md_niov, - NULL, lntmsg->msg_md->md_iov.kiov, - lntmsg->msg_md->md_length); - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - return kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA, lntmsg, - payload_niov, - payload_iov, payload_kiov, - payload_nob); - } - - /* Send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR ("Can't send %d to %s: tx descs exhausted%s\n", - type, libcfs_nid2str(target.nid), - in_interrupt() ? " (intr)" : ""); - return (-ENOMEM); - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); - - kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, - offsetof(kib_immediate_msg_t, - ibim_payload[payload_nob])); - - /* lntmsg gets finalized when tx completes */ - tx->tx_lntmsg[0] = lntmsg; - - kibnal_launch_tx(tx, target.nid); - return (0); -} - -int -kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - void **new_private) -{ - kib_rx_t *rx = private; - kib_conn_t *conn = rx->rx_conn; - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* Can't block if RDMA completions need normal credits */ - LCONSOLE_ERROR_MSG(0x12a, - "Dropping message from %s: no buffers free. " - "%s is running an old version of LNET that may " - "deadlock if messages wait for buffers)\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return -EDEADLK; - } - - *new_private = private; - return 0; -} - -int -kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kib_rx_t *rx = private; - kib_msg_t *rxmsg = rx->rx_msg; - int msg_nob; - int rc = 0; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt ()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBNAL_MSG_IMMEDIATE: - msg_nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); - if (msg_nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - msg_nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov( - niov, kiov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov( - niov, iov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - - lnet_finalize (ni, lntmsg, 0); - break; - - case IBNAL_MSG_GET_RDMA: - if (lntmsg != NULL) { - /* GET matched: RDMA lntmsg's payload */ - kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0, - rx, lntmsg, - lntmsg->msg_niov, - lntmsg->msg_iov, - lntmsg->msg_kiov, - lntmsg->msg_offset, - lntmsg->msg_len); - } else { - /* GET didn't match anything */ - kibnal_start_active_rdma (IBNAL_MSG_GET_DONE, -ENODATA, - rx, NULL, 0, NULL, NULL, 0, 0); - } - break; - - case IBNAL_MSG_PUT_RDMA: - kibnal_start_active_rdma (IBNAL_MSG_PUT_DONE, 0, rx, lntmsg, - niov, iov, kiov, offset, mlen); - break; - } - - kibnal_post_rx(rx, 1, 0); - return rc; -} - -int -kibnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kibnal_data.kib_nthreads); - return (0); -} - -void -kibnal_thread_fini (void) -{ - atomic_dec (&kibnal_data.kib_nthreads); -} - -void -kibnal_peer_alive (kib_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->ibp_last_alive = cfs_time_current(); - mb(); -} - -void -kibnal_peer_notify (kib_peer_t *peer) -{ - time_t last_alive = 0; - int error = 0; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (list_empty(&peer->ibp_conns) && - peer->ibp_accepting == 0 && - peer->ibp_connecting == 0 && - peer->ibp_error != 0) { - error = peer->ibp_error; - peer->ibp_error = 0; - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive); - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (error != 0) - lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive); -} - -void -kibnal_close_conn_locked (kib_conn_t *conn, int error) -{ - /* This just does the immmediate housekeeping, and schedules the - * connection for the reaper to finish off. - * Caller holds kib_global_lock exclusively in irq context */ - kib_peer_t *peer = conn->ibc_peer; - - CDEBUG (error == 0 ? D_NET : D_NETERROR, - "closing conn to %s: error %d\n", - libcfs_nid2str(peer->ibp_nid), error); - - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED || - conn->ibc_state == IBNAL_CONN_CONNECTING); - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { - /* kib_reaper_conns takes ibc_list's ref */ - list_del (&conn->ibc_list); - } else { - /* new ref for kib_reaper_conns */ - kibnal_conn_addref(conn); - } - - if (list_empty (&peer->ibp_conns)) { /* no more conns */ - if (peer->ibp_persistence == 0 && /* non-persistent peer */ - kibnal_peer_active(peer)) /* still in peer table */ - kibnal_unlink_peer_locked (peer); - - peer->ibp_error = error; /* set/clear error on last conn */ - } - - conn->ibc_state = IBNAL_CONN_DEATHROW; - - /* Schedule conn for closing/destruction */ - spin_lock (&kibnal_data.kib_reaper_lock); - - list_add_tail (&conn->ibc_list, &kibnal_data.kib_reaper_conns); - wake_up (&kibnal_data.kib_reaper_waitq); - - spin_unlock (&kibnal_data.kib_reaper_lock); -} - -int -kibnal_close_conn (kib_conn_t *conn, int why) -{ - unsigned long flags; - int count = 0; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - LASSERT (conn->ibc_state >= IBNAL_CONN_CONNECTING); - - if (conn->ibc_state <= IBNAL_CONN_ESTABLISHED) { - count = 1; - kibnal_close_conn_locked (conn, why); - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return (count); -} - -void -kibnal_peer_connect_failed (kib_peer_t *peer, int active, int error) -{ - LIST_HEAD (zombies); - unsigned long flags; - - LASSERT(error != 0); - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (active) { - LASSERT (peer->ibp_connecting != 0); - peer->ibp_connecting--; - } else { - LASSERT (peer->ibp_accepting != 0); - peer->ibp_accepting--; - } - - if (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0) { - /* another connection attempt under way... */ - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return; - } - - if (list_empty(&peer->ibp_conns)) { - /* Say when active connection can be re-attempted */ - peer->ibp_reconnect_interval *= 2; - peer->ibp_reconnect_interval = - MAX(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_min_reconnect_interval); - peer->ibp_reconnect_interval = - MIN(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_max_reconnect_interval); - - peer->ibp_reconnect_time = jiffies + - peer->ibp_reconnect_interval * HZ; - - /* Take peer's blocked transmits; I'll complete - * them with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kibnal_peer_active(peer) && - (peer->ibp_persistence == 0)) { - /* failed connection attempt on non-persistent peer */ - kibnal_unlink_peer_locked (peer); - } - - peer->ibp_error = error; - } else { - /* Can't have blocked transmits if there are connections */ - LASSERT (list_empty(&peer->ibp_tx_queue)); - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_peer_notify(peer); - - if (!list_empty (&zombies)) - CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_txlist_done(&zombies, -EHOSTUNREACH); -} - -void -kibnal_connreq_done (kib_conn_t *conn, int active, int status) -{ - int state = conn->ibc_state; - kib_peer_t *peer = conn->ibc_peer; - kib_tx_t *tx; - unsigned long flags; - int rc; - int i; - - if (conn->ibc_connreq != NULL) { - LIBCFS_FREE (conn->ibc_connreq, sizeof (*conn->ibc_connreq)); - conn->ibc_connreq = NULL; - } - - switch (state) { - case IBNAL_CONN_CONNECTING: - /* conn has a CM comm_id */ - if (status == 0) { - /* Install common (active/passive) callback for - * disconnect/idle notification */ - rc = tsIbCmCallbackModify(conn->ibc_comm_id, - kibnal_conn_callback, - conn); - LASSERT (rc == 0); - } else { - /* LASSERT (no more CM callbacks) */ - rc = tsIbCmCallbackModify(conn->ibc_comm_id, - kibnal_bad_conn_callback, - conn); - LASSERT (rc == 0); - } - break; - - case IBNAL_CONN_INIT_QP: - LASSERT (status != 0); - break; - - default: - LBUG(); - } - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (active) - LASSERT (peer->ibp_connecting != 0); - else - LASSERT (peer->ibp_accepting != 0); - - if (status == 0 && /* connection established */ - kibnal_peer_active(peer)) { /* peer not deleted */ - - if (active) - peer->ibp_connecting--; - else - peer->ibp_accepting--; - - conn->ibc_last_send = jiffies; - conn->ibc_state = IBNAL_CONN_ESTABLISHED; - kibnal_peer_alive(peer); - - /* +1 ref for ibc_list; caller(== CM)'s ref remains until - * the IB_CM_IDLE callback */ - kibnal_conn_addref(conn); - list_add (&conn->ibc_list, &peer->ibp_conns); - - peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */ - - /* post blocked sends to the new connection */ - spin_lock (&conn->ibc_lock); - - while (!list_empty (&peer->ibp_tx_queue)) { - tx = list_entry (peer->ibp_tx_queue.next, - kib_tx_t, tx_list); - - list_del (&tx->tx_list); - - kibnal_queue_tx_locked (tx, conn); - } - - spin_unlock (&conn->ibc_lock); - - /* Nuke any dangling conns from a different peer instance... */ - kibnal_close_stale_conns_locked (conn->ibc_peer, - conn->ibc_incarnation); - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - /* queue up all the receives */ - for (i = 0; i < IBNAL_RX_MSGS; i++) { - /* +1 ref for rx desc */ - kibnal_conn_addref(conn); - - CDEBUG(D_NET, "RX[%d] %p->%p - "LPX64"\n", - i, &conn->ibc_rxs[i], conn->ibc_rxs[i].rx_msg, - conn->ibc_rxs[i].rx_vaddr); - - kibnal_post_rx (&conn->ibc_rxs[i], 0, 0); - } - - kibnal_check_sends (conn); - return; - } - - if (status == 0) { - /* connection established, but peer was deleted. Schedule for - * reaper to cm_disconnect... */ - status = -ECONNABORTED; - kibnal_close_conn_locked (conn, status); - } else { - /* just waiting for refs to drain */ - conn->ibc_state = IBNAL_CONN_ZOMBIE; - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_peer_connect_failed (conn->ibc_peer, active, status); -} - -int -kibnal_accept_connreq (kib_conn_t **connp, tTS_IB_CM_COMM_ID cid, - kib_msg_t *msg, int nob) -{ - kib_conn_t *conn; - kib_peer_t *peer; - kib_peer_t *peer2; - unsigned long flags; - int rc; - - rc = kibnal_unpack_msg(msg, 0, nob); - if (rc != 0) { - CERROR("Can't unpack connreq msg: %d\n", rc); - return -EPROTO; - } - - CDEBUG(D_NET, "connreq from %s\n", libcfs_nid2str(msg->ibm_srcnid)); - - if (msg->ibm_type != IBNAL_MSG_CONNREQ) { - CERROR("Unexpected connreq msg type: %x from %s\n", - msg->ibm_type, libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) { - CERROR("Can't accept %s: bad queue depth %d (%d expected)\n", - libcfs_nid2str(msg->ibm_srcnid), - msg->ibm_u.connparams.ibcp_queue_depth, - IBNAL_MSG_QUEUE_SIZE); - return (-EPROTO); - } - - conn = kibnal_create_conn(); - if (conn == NULL) - return (-ENOMEM); - - /* assume 'nid' is a new peer */ - rc = kibnal_create_peer(&peer, msg->ibm_srcnid); - if (rc != 0) { - kibnal_conn_decref(conn); - return (-ENOMEM); - } - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (kibnal_data.kib_nonewpeers) { - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - CERROR ("Shutdown has started, drop connreq from %s\n", - libcfs_nid2str(msg->ibm_srcnid)); - kibnal_conn_decref(conn); - kibnal_peer_decref(peer); - return -ESHUTDOWN; - } - - /* Check I'm the same instance that gave the connection parameters. - * NB If my incarnation changes after this, the peer will get nuked and - * we'll spot that when the connection is finally added into the peer's - * connlist */ - if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - CERROR("Stale connection params from %s\n", - libcfs_nid2str(msg->ibm_srcnid)); - kibnal_conn_decref(conn); - kibnal_peer_decref(peer); - return -ESTALE; - } - - peer2 = kibnal_find_peer_locked(msg->ibm_srcnid); - if (peer2 == NULL) { - /* Brand new peer */ - LASSERT (peer->ibp_accepting == 0); - - /* peer table takes my ref on peer */ - list_add_tail (&peer->ibp_list, - kibnal_nid2peerlist(msg->ibm_srcnid)); - } else { - /* tie-break connection race in favour of the higher NID */ - if (peer2->ibp_connecting != 0 && - msg->ibm_srcnid < kibnal_data.kib_ni->ni_nid) { - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - CWARN("Conn race %s\n", - libcfs_nid2str(peer2->ibp_nid)); - - kibnal_conn_decref(conn); - kibnal_peer_decref(peer); - return -EALREADY; - } - - kibnal_peer_decref(peer); - peer = peer2; - } - - /* +1 ref for conn */ - kibnal_peer_addref(peer); - peer->ibp_accepting++; - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - conn->ibc_peer = peer; - conn->ibc_state = IBNAL_CONN_CONNECTING; - conn->ibc_comm_id = cid; - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_version = msg->ibm_version; - - *connp = conn; - return (0); -} - -tTS_IB_CM_CALLBACK_RETURN -kibnal_bad_conn_callback (tTS_IB_CM_EVENT event, - tTS_IB_CM_COMM_ID cid, - void *param, - void *arg) -{ - CERROR ("Unexpected event %d: conn %p\n", event, arg); - LBUG (); - return TS_IB_CM_CALLBACK_PROCEED; -} - -void -kibnal_abort_txs (kib_conn_t *conn, struct list_head *txs) -{ - LIST_HEAD (zombies); - struct list_head *tmp; - struct list_head *nxt; - kib_tx_t *tx; - unsigned long flags; - - spin_lock_irqsave (&conn->ibc_lock, flags); - - list_for_each_safe (tmp, nxt, txs) { - tx = list_entry (tmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (tx->tx_passive_rdma || - !tx->tx_passive_rdma_wait); - - LASSERT (tx->tx_passive_rdma_wait || - tx->tx_sending != 0); - } else { - LASSERT (!tx->tx_passive_rdma_wait); - LASSERT (tx->tx_sending == 0); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_passive_rdma_wait = 0; - - if (tx->tx_sending == 0) { - list_del (&tx->tx_list); - list_add (&tx->tx_list, &zombies); - } - } - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - kibnal_txlist_done (&zombies, -ECONNABORTED); -} - -tTS_IB_CM_CALLBACK_RETURN -kibnal_conn_callback (tTS_IB_CM_EVENT event, - tTS_IB_CM_COMM_ID cid, - void *param, - void *arg) -{ - kib_conn_t *conn = arg; - int rc; - - /* Established Connection Notifier */ - - switch (event) { - default: - CDEBUG(D_NETERROR, "Connection %p -> %s ERROR %d\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), event); - kibnal_close_conn (conn, -ECONNABORTED); - break; - - case TS_IB_CM_DISCONNECTED: - CDEBUG(D_NETERROR, "Connection %p -> %s DISCONNECTED.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_close_conn (conn, 0); - break; - - case TS_IB_CM_IDLE: - CDEBUG(D_NET, "Connection %p -> %s IDLE.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - /* LASSERT (no further callbacks) */ - rc = tsIbCmCallbackModify(cid, kibnal_bad_conn_callback, conn); - LASSERT (rc == 0); - - /* NB we wait until the connection has closed before - * completing outstanding passive RDMAs so we can be sure - * the network can't touch the mapped memory any more. */ - - kibnal_abort_txs(conn, &conn->ibc_tx_queue); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_nocred); - kibnal_abort_txs(conn, &conn->ibc_active_txs); - - kibnal_conn_decref(conn); /* Lose CM's ref */ - break; - } - - return TS_IB_CM_CALLBACK_PROCEED; -} - -tTS_IB_CM_CALLBACK_RETURN -kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, - tTS_IB_CM_COMM_ID cid, - void *param, - void *arg) -{ - kib_conn_t *conn = arg; - int rc; - - switch (event) { - default: - if (conn == NULL) { - /* no connection yet */ - CERROR ("Unexpected event: %d\n", event); - return TS_IB_CM_CALLBACK_ABORT; - } - - CERROR ("%s event %p -> %s: %d\n", - (event == TS_IB_CM_IDLE) ? "IDLE" : "Unexpected", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), event); - kibnal_connreq_done(conn, 0, -ECONNABORTED); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - - case TS_IB_CM_REQ_RECEIVED: { - struct ib_cm_req_received_param *req = param; - kib_msg_t *msg = req->remote_private_data; - - LASSERT (conn == NULL); - - /* Don't really know srcnid until successful unpack */ - CDEBUG(D_NET, "REQ from ?%s?\n", libcfs_nid2str(msg->ibm_srcnid)); - - rc = kibnal_accept_connreq(&conn, cid, msg, - req->remote_private_data_len); - if (rc != 0) { - CERROR ("Can't accept ?%s?: %d\n", - libcfs_nid2str(msg->ibm_srcnid), rc); - return TS_IB_CM_CALLBACK_ABORT; - } - - /* update 'arg' for next callback */ - rc = tsIbCmCallbackModify(cid, kibnal_passive_conn_callback, conn); - LASSERT (rc == 0); - - msg = req->accept_param.reply_private_data; - kibnal_init_msg(msg, IBNAL_MSG_CONNACK, - sizeof(msg->ibm_u.connparams)); - - msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - - kibnal_pack_msg(msg, conn->ibc_version, 0, - conn->ibc_peer->ibp_nid, - conn->ibc_incarnation); - - req->accept_param.qp = conn->ibc_qp; - req->accept_param.reply_private_data_len = msg->ibm_nob; - req->accept_param.responder_resources = IBNAL_RESPONDER_RESOURCES; - req->accept_param.initiator_depth = IBNAL_RESPONDER_RESOURCES; - req->accept_param.rnr_retry_count = IBNAL_RNR_RETRY; - req->accept_param.flow_control = IBNAL_FLOW_CONTROL; - - CDEBUG(D_NET, "Proceeding\n"); - return TS_IB_CM_CALLBACK_PROCEED; /* CM takes my ref on conn */ - } - - case TS_IB_CM_ESTABLISHED: - LASSERT (conn != NULL); - CWARN("Connection %p -> %s ESTABLISHED.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_connreq_done(conn, 0, 0); - return TS_IB_CM_CALLBACK_PROCEED; - } -} - -tTS_IB_CM_CALLBACK_RETURN -kibnal_active_conn_callback (tTS_IB_CM_EVENT event, - tTS_IB_CM_COMM_ID cid, - void *param, - void *arg) -{ - kib_conn_t *conn = arg; - unsigned long flags; - - switch (event) { - case TS_IB_CM_REP_RECEIVED: { - struct ib_cm_rep_received_param *rep = param; - kib_msg_t *msg = rep->remote_private_data; - int nob = rep->remote_private_data_len; - int rc; - - rc = kibnal_unpack_msg(msg, conn->ibc_version, nob); - if (rc != 0) { - CERROR ("Error %d unpacking conn ack from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, 1, rc); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } - - if (msg->ibm_type != IBNAL_MSG_CONNACK) { - CERROR ("Unexpected conn ack type %d from %s\n", - msg->ibm_type, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, 1, -EPROTO); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } - - if (!lnet_ptlcompat_matchnid(conn->ibc_peer->ibp_nid, - msg->ibm_srcnid) || - !lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR("Stale conn ack from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, 1, -ESTALE); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) { - CERROR ("Bad queue depth %d from %s\n", - msg->ibm_u.connparams.ibcp_queue_depth, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, 1, -EPROTO); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } - - CDEBUG(D_NET, "Connection %p -> %s REP_RECEIVED.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - return TS_IB_CM_CALLBACK_PROCEED; - } - - case TS_IB_CM_ESTABLISHED: - CWARN("Connection %p -> %s ESTABLISHED\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_connreq_done(conn, 1, 0); - return TS_IB_CM_CALLBACK_PROCEED; - - case TS_IB_CM_IDLE: - CDEBUG(D_NETERROR, "Connection %p -> %s IDLE\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* I assume this connection attempt was rejected because the - * peer found a stale QP; I'll just try again */ - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - kibnal_schedule_active_connect_locked(conn->ibc_peer); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_connreq_done(conn, 1, -ECONNABORTED); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - - default: - CDEBUG(D_NETERROR, "Connection %p -> %s ERROR %d\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), event); - kibnal_connreq_done(conn, 1, -ECONNABORTED); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } -} - -int -kibnal_pathreq_callback (tTS_IB_CLIENT_QUERY_TID tid, int status, - struct ib_path_record *resp, int remaining, - void *arg) -{ - kib_conn_t *conn = arg; - kib_peer_t *peer = conn->ibc_peer; - kib_msg_t *msg = &conn->ibc_connreq->cr_msg; - - if (status != 0) { - CDEBUG (D_NETERROR, "Pathreq %p -> %s failed: %d\n", - conn, libcfs_nid2str(peer->ibp_nid), status); - kibnal_connreq_done(conn, 1, status); - kibnal_conn_decref(conn); /* drop callback's ref */ - return 1; /* non-zero prevents further callbacks */ - } - - conn->ibc_connreq->cr_path = *resp; - - kibnal_init_msg(msg, IBNAL_MSG_CONNREQ, sizeof(msg->ibm_u.connparams)); - msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - kibnal_pack_msg(msg, conn->ibc_version, 0, - peer->ibp_nid, conn->ibc_incarnation); - - conn->ibc_connreq->cr_connparam = (struct ib_cm_active_param) { - .qp = conn->ibc_qp, - .req_private_data = msg, - .req_private_data_len = msg->ibm_nob, - .responder_resources = IBNAL_RESPONDER_RESOURCES, - .initiator_depth = IBNAL_RESPONDER_RESOURCES, - .retry_count = IBNAL_RETRY, - .rnr_retry_count = IBNAL_RNR_RETRY, - .cm_response_timeout = *kibnal_tunables.kib_timeout, - .max_cm_retries = IBNAL_CM_RETRY, - .flow_control = IBNAL_FLOW_CONTROL, - }; - - /* XXX set timeout just like SDP!!!*/ - conn->ibc_connreq->cr_path.packet_life = 13; - - /* Flag I'm getting involved with the CM... */ - conn->ibc_state = IBNAL_CONN_CONNECTING; - - CDEBUG(D_NET, "Connecting to, service id "LPX64", on %s\n", - conn->ibc_connreq->cr_svcrsp.ibsr_svc_id, - libcfs_nid2str(peer->ibp_nid)); - - /* kibnal_connect_callback gets my conn ref */ - status = ib_cm_connect (&conn->ibc_connreq->cr_connparam, - &conn->ibc_connreq->cr_path, NULL, - conn->ibc_connreq->cr_svcrsp.ibsr_svc_id, 0, - kibnal_active_conn_callback, conn, - &conn->ibc_comm_id); - if (status != 0) { - CERROR ("Connect %p -> %s failed: %d\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), status); - /* Back out state change: I've not got a CM comm_id yet... */ - conn->ibc_state = IBNAL_CONN_INIT_QP; - kibnal_connreq_done(conn, 1, status); - kibnal_conn_decref(conn); /* Drop callback's ref */ - } - - return 1; /* non-zero to prevent further callbacks */ -} - -void -kibnal_connect_peer (kib_peer_t *peer) -{ - kib_conn_t *conn; - int rc; - - conn = kibnal_create_conn(); - if (conn == NULL) { - CERROR ("Can't allocate conn\n"); - kibnal_peer_connect_failed (peer, 1, -ENOMEM); - return; - } - - conn->ibc_peer = peer; - kibnal_peer_addref(peer); - - LIBCFS_ALLOC (conn->ibc_connreq, sizeof (*conn->ibc_connreq)); - if (conn->ibc_connreq == NULL) { - CERROR ("Can't allocate connreq\n"); - kibnal_connreq_done(conn, 1, -ENOMEM); - kibnal_conn_decref(conn); /* drop my ref */ - return; - } - - memset(conn->ibc_connreq, 0, sizeof (*conn->ibc_connreq)); - - rc = kibnal_make_svcqry(conn); - if (rc != 0) { - kibnal_connreq_done (conn, 1, rc); - kibnal_conn_decref(conn); /* drop my ref */ - return; - } - - rc = ib_cached_gid_get(kibnal_data.kib_device, - kibnal_data.kib_port, 0, - conn->ibc_connreq->cr_gid); - LASSERT (rc == 0); - - /* kibnal_pathreq_callback gets my conn ref */ - rc = tsIbPathRecordRequest (kibnal_data.kib_device, - kibnal_data.kib_port, - conn->ibc_connreq->cr_gid, - conn->ibc_connreq->cr_svcrsp.ibsr_svc_gid, - conn->ibc_connreq->cr_svcrsp.ibsr_svc_pkey, - 0, - *kibnal_tunables.kib_timeout * HZ, - 0, - kibnal_pathreq_callback, conn, - &conn->ibc_connreq->cr_tid); - if (rc == 0) - return; /* callback now has my ref on conn */ - - CERROR ("Path record request %p -> %s failed: %d\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kibnal_connreq_done(conn, 1, rc); - kibnal_conn_decref(conn); /* drop my ref */ -} - -int -kibnal_check_txs (kib_conn_t *conn, struct list_head *txs) -{ - kib_tx_t *tx; - struct list_head *ttmp; - unsigned long flags; - int timed_out = 0; - - spin_lock_irqsave (&conn->ibc_lock, flags); - - list_for_each (ttmp, txs) { - tx = list_entry (ttmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (tx->tx_passive_rdma || - !tx->tx_passive_rdma_wait); - - LASSERT (tx->tx_passive_rdma_wait || - tx->tx_sending != 0); - } else { - LASSERT (!tx->tx_passive_rdma_wait); - LASSERT (tx->tx_sending == 0); - } - - if (time_after_eq (jiffies, tx->tx_deadline)) { - timed_out = 1; - break; - } - } - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - return timed_out; -} - -int -kibnal_conn_timed_out (kib_conn_t *conn) -{ - return kibnal_check_txs(conn, &conn->ibc_tx_queue) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) || - kibnal_check_txs(conn, &conn->ibc_active_txs); -} - -void -kibnal_check_conns (int idx) -{ - struct list_head *peers = &kibnal_data.kib_peers[idx]; - struct list_head *ptmp; - kib_peer_t *peer; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - - again: - /* NB. We expect to have a look at all the peers and not find any - * rdmas to time out, so we just use a shared lock while we - * take a look... */ - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - - list_for_each (ctmp, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); - - - /* In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - kibnal_check_sends(conn); - - if (!kibnal_conn_timed_out(conn)) - continue; - - kibnal_conn_addref(conn); - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - - CERROR("Timed out RDMA with %s\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_close_conn (conn, -ETIMEDOUT); - kibnal_conn_decref(conn); - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_terminate_conn (kib_conn_t *conn) -{ - int rc; - - CDEBUG(D_NET, "conn %p\n", conn); - LASSERT (conn->ibc_state == IBNAL_CONN_DEATHROW); - conn->ibc_state = IBNAL_CONN_ZOMBIE; - - rc = ib_cm_disconnect (conn->ibc_comm_id); - if (rc != 0) - CERROR ("Error %d disconnecting conn %p -> %s\n", - rc, conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_peer_notify(conn->ibc_peer); -} - -int -kibnal_reaper (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - kib_conn_t *conn; - int timeout; - int i; - int peer_index = 0; - unsigned long deadline = jiffies; - - cfs_daemonize ("kibnal_reaper"); - cfs_block_allsigs (); - - init_waitqueue_entry (&wait, current); - - spin_lock_irqsave (&kibnal_data.kib_reaper_lock, flags); - - while (!kibnal_data.kib_shutdown) { - if (!list_empty (&kibnal_data.kib_reaper_conns)) { - conn = list_entry (kibnal_data.kib_reaper_conns.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kibnal_data.kib_reaper_lock, flags); - - switch (conn->ibc_state) { - case IBNAL_CONN_DEATHROW: - LASSERT (conn->ibc_comm_id != TS_IB_CM_COMM_ID_INVALID); - /* Disconnect: conn becomes a zombie in the - * callback and last ref reschedules it - * here... */ - kibnal_terminate_conn(conn); - kibnal_conn_decref(conn); - break; - - case IBNAL_CONN_INIT_QP: - case IBNAL_CONN_ZOMBIE: - kibnal_destroy_conn (conn); - break; - - default: - CERROR ("Bad conn %p state: %d\n", - conn, conn->ibc_state); - LBUG(); - } - - spin_lock_irqsave (&kibnal_data.kib_reaper_lock, flags); - continue; - } - - spin_unlock_irqrestore (&kibnal_data.kib_reaper_lock, flags); - - /* careful with the jiffy wrap... */ - while ((timeout = (int)(deadline - jiffies)) <= 0) { - const int n = 4; - const int p = 1; - int chunk = kibnal_data.kib_peer_hash_size; - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if (*kibnal_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kibnal_tunables.kib_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kibnal_check_conns (peer_index); - peer_index = (peer_index + 1) % - kibnal_data.kib_peer_hash_size; - } - - deadline += p * HZ; - } - - kibnal_data.kib_reaper_waketime = jiffies + timeout; - - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&kibnal_data.kib_reaper_waitq, &wait); - - schedule_timeout (timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kibnal_data.kib_reaper_waitq, &wait); - - spin_lock_irqsave (&kibnal_data.kib_reaper_lock, flags); - } - - spin_unlock_irqrestore (&kibnal_data.kib_reaper_lock, flags); - - kibnal_thread_fini (); - return (0); -} - -int -kibnal_connd (void *arg) -{ - long id = (long)arg; - char name[16]; - wait_queue_t wait; - unsigned long flags; - kib_peer_t *peer; - kib_acceptsock_t *as; - int did_something; - - snprintf(name, sizeof(name), "kibnal_connd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry (&wait, current); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - while (!kibnal_data.kib_shutdown) { - did_something = 0; - - if (!list_empty (&kibnal_data.kib_connd_acceptq)) { - as = list_entry (kibnal_data.kib_connd_acceptq.next, - kib_acceptsock_t, ibas_list); - list_del (&as->ibas_list); - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_handle_svcqry(as->ibas_sock); - kibnal_free_acceptsock(as); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - did_something = 1; - } - - /* Only handle an outgoing connection request if there is someone left - * to handle an incoming svcqry */ - if (!list_empty (&kibnal_data.kib_connd_peers) && - ((kibnal_data.kib_connd_connecting + 1) < - *kibnal_tunables.kib_n_connd)) { - peer = list_entry (kibnal_data.kib_connd_peers.next, - kib_peer_t, ibp_connd_list); - - list_del_init (&peer->ibp_connd_list); - kibnal_data.kib_connd_connecting++; - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_connect_peer (peer); - kibnal_peer_decref(peer); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - kibnal_data.kib_connd_connecting--; - } - - if (did_something) - continue; - - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kibnal_data.kib_connd_waitq, &wait); - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - schedule(); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_thread_fini (); - return (0); -} - -int -kibnal_scheduler(void *arg) -{ - long id = (long)arg; - char name[16]; - kib_rx_t *rx; - kib_tx_t *tx; - unsigned long flags; - int rc; - int counter = 0; - int did_something; - - snprintf(name, sizeof(name), "kibnal_sd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - - while (!kibnal_data.kib_shutdown) { - did_something = 0; - - while (!list_empty(&kibnal_data.kib_sched_txq)) { - tx = list_entry(kibnal_data.kib_sched_txq.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - kibnal_tx_done(tx); - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, - flags); - } - - if (!list_empty(&kibnal_data.kib_sched_rxq)) { - rx = list_entry(kibnal_data.kib_sched_rxq.next, - kib_rx_t, rx_list); - list_del(&rx->rx_list); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - kibnal_rx(rx); - - did_something = 1; - spin_lock_irqsave(&kibnal_data.kib_sched_lock, - flags); - } - - /* nothing to do or hogging CPU */ - if (!did_something || counter++ == IBNAL_RESCHED) { - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - counter = 0; - - if (!did_something) { - rc = wait_event_interruptible_exclusive( - kibnal_data.kib_sched_waitq, - !list_empty(&kibnal_data.kib_sched_txq) || - !list_empty(&kibnal_data.kib_sched_rxq) || - kibnal_data.kib_shutdown); - } else { - our_cond_resched(); - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, - flags); - } - } - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - - kibnal_thread_fini(); - return (0); -} diff --git a/lnet/klnds/openiblnd/openiblnd_modparams.c b/lnet/klnds/openiblnd/openiblnd_modparams.c deleted file mode 100644 index 027b716fadfa51de36f3245530e733763367b47c..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/openiblnd_modparams.c +++ /dev/null @@ -1,222 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "openiblnd.h" - -static char *ipif_basename = "ib"; -CFS_MODULE_PARM(ipif_basename, "s", charp, 0444, - "IPoIB interface base name"); - -static int n_connd = 4; -CFS_MODULE_PARM(n_connd, "i", int, 0444, - "# of connection daemons"); - -static int min_reconnect_interval = 1; -CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644, - "minimum connection retry interval (seconds)"); - -static int max_reconnect_interval = 60; -CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644, - "maximum connection retry interval (seconds)"); - -static int concurrent_peers = 1152; -CFS_MODULE_PARM(concurrent_peers, "i", int, 0444, - "maximum number of peers that may connect"); - -static int cksum = 0; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int ntx = 384; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of message descriptors"); - -static int credits = 256; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 16; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int keepalive = 100; -CFS_MODULE_PARM(keepalive, "i", int, 0644, - "Idle time in seconds before sending a keepalive"); - -kib_tunables_t kibnal_tunables = { - .kib_ipif_basename = &ipif_basename, - .kib_n_connd = &n_connd, - .kib_min_reconnect_interval = &min_reconnect_interval, - .kib_max_reconnect_interval = &max_reconnect_interval, - .kib_concurrent_peers = &concurrent_peers, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_ntx = &ntx, - .kib_credits = &credits, - .kib_peercredits = &peer_credits, - .kib_keepalive = &keepalive, -}; - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - -static cfs_sysctl_table_t kibnal_ctl_table[] = { - { - .ctl_name = 1, - .procname = "ipif_basename", - .data = &ipif_basename, - .maxlen = 1024, - .mode = 0444, - .proc_handler = &proc_dostring - }, - { - .ctl_name = 2, - .procname = "n_connd", - .data = &n_connd, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 3, - .procname = "min_reconnect_interval", - .data = &min_reconnect_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 4, - .procname = "max_reconnect_interval", - .data = &max_reconnect_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 5, - .procname = "concurrent_peers", - .data = &concurrent_peers, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 6, - .procname = "cksum", - .data = &cksum, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 7, - .procname = "timeout", - .data = &timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 8, - .procname = "ntx", - .data = &ntx, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 9, - .procname = "credits", - .data = &credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 10, - .procname = "peer_credits", - .data = &peer_credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 11, - .procname = "keepalive", - .data = &keepalive, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - {0} -}; - -static cfs_sysctl_table_t kibnal_top_ctl_table[] = { - { - .ctl_name = 203, - .procname = "openibnal", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = kibnal_ctl_table - }, - {0} -}; - -int -kibnal_tunables_init () -{ - kibnal_tunables.kib_sysctl = - cfs_register_sysctl_table(kibnal_top_ctl_table, 0); - - if (kibnal_tunables.kib_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -kibnal_tunables_fini () -{ - if (kibnal_tunables.kib_sysctl != NULL) - cfs_unregister_sysctl_table(kibnal_tunables.kib_sysctl); -} - -#else - -int -kibnal_tunables_init () -{ - return 0; -} - -void -kibnal_tunables_fini () -{ -} - -#endif diff --git a/lnet/klnds/ptllnd/.cvsignore b/lnet/klnds/ptllnd/.cvsignore deleted file mode 100644 index 0586565dc9fc3efe66aef7b2989e5f29b8823c37..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.*.cmd -.tmp_versions -.depend -wirecheck diff --git a/lnet/klnds/ptllnd/Makefile.in b/lnet/klnds/ptllnd/Makefile.in deleted file mode 100755 index ec2f9bb1544ba61171435411bdd9df47c459134d..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/Makefile.in +++ /dev/null @@ -1,13 +0,0 @@ -MODULES := kptllnd - -EXTRA_POST_CFLAGS := @PTLLNDCPPFLAGS@ - -kptllnd-objs := ptllnd.o \ - ptllnd_cb.o \ - ptllnd_modparams.o \ - ptllnd_peer.o \ - ptllnd_rx_buf.o \ - ptllnd_tx.o \ - ptllnd_ptltrace.o - -@INCLUDE_RULES@ diff --git a/lnet/klnds/ptllnd/README b/lnet/klnds/ptllnd/README deleted file mode 100644 index 5cb6cfcafe7455c9f722b87b2ad7f637362e2054..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/README +++ /dev/null @@ -1,47 +0,0 @@ -1. This version of the Portals LND is intended to work on the Cray XT3 using - Cray Portals as a network transport. - -2. To enable the building of the Portals LND (ptllnd.ko) configure with the - following option: - ./configure --with-portals=<path-to-portals-headers> - -3. The following configuration options are supported - - ntx: - The total number of message descritprs - - concurrent_peers: - The maximum number of conncurent peers. Peers attemting - to connect beyond the maximum will not be allowd. - - peer_hash_table_size: - The number of hash table slots for the peers. This number - should scale with concurrent_peers. - - cksum: - Set to non-zero to enable message (not RDMA) checksums for - outgoing packets. Incoming packets will always be checksumed - if necssary, independnt of this value. - - timeout: - The amount of time a request can linger in a peers active - queue, before the peer is considered dead. Units: seconds. - - portal: - The portal ID to use for the ptllnd traffic. - - rxb_npages: - The number of pages in a RX Buffer. - - credits: - The maximum total number of concurrent sends that are - outstanding at any given instant. - - peercredits: - The maximum number of concurrent sends that are - outstanding to a single piere at any given instant. - - max_msg_size: - The maximum immedate message size. This MUST be - the same on all nodes in a cluster. A peer connecting - with a diffrent max_msg_size will be rejected. diff --git a/lnet/klnds/ptllnd/autoMakefile.am b/lnet/klnds/ptllnd/autoMakefile.am deleted file mode 100755 index bd8cc9c81740cb8310c6846267271a7c619b909d..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/autoMakefile.am +++ /dev/null @@ -1,8 +0,0 @@ -if MODULES -if BUILD_PTLLND -modulenet_DATA = kptllnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kptllnd-objs:%.o=%.c) ptllnd.h diff --git a/lnet/klnds/ptllnd/ptllnd.c b/lnet/klnds/ptllnd/ptllnd.c deleted file mode 100755 index 0a556877ca4b0475978fe0aec32a97be4f75cf01..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd.c +++ /dev/null @@ -1,895 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -lnd_t kptllnd_lnd = { - .lnd_type = PTLLND, - .lnd_startup = kptllnd_startup, - .lnd_shutdown = kptllnd_shutdown, - .lnd_ctl = kptllnd_ctl, - .lnd_send = kptllnd_send, - .lnd_recv = kptllnd_recv, - .lnd_eager_recv = kptllnd_eager_recv, -}; - -kptl_data_t kptllnd_data; - -char * -kptllnd_ptlid2str(ptl_process_id_t id) -{ - static char strs[64][32]; - static int idx = 0; - - unsigned long flags; - char *str; - - spin_lock_irqsave(&kptllnd_data.kptl_ptlid2str_lock, flags); - str = strs[idx++]; - if (idx >= sizeof(strs)/sizeof(strs[0])) - idx = 0; - spin_unlock_irqrestore(&kptllnd_data.kptl_ptlid2str_lock, flags); - - snprintf(str, sizeof(strs[0]), FMT_PTLID, id.pid, id.nid); - return str; -} - -void -kptllnd_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux fedora 2.6.11-co-0.6.4 #1 Mon Jun 19 05:36:13 UTC 2006 i686 i686 i386 GNU - * with gcc version 4.1.1 20060525 (Red Hat 4.1.1-1) */ - - - /* Constants... */ - CLASSERT (PTL_RESERVED_MATCHBITS == 0x100); - CLASSERT (LNET_MSG_MATCHBITS == 0); - CLASSERT (PTLLND_MSG_MAGIC == 0x50746C4E); - CLASSERT (PTLLND_MSG_VERSION == 0x04); - CLASSERT (PTLLND_RDMA_OK == 0x00); - CLASSERT (PTLLND_RDMA_FAIL == 0x01); - CLASSERT (PTLLND_MSG_TYPE_INVALID == 0x00); - CLASSERT (PTLLND_MSG_TYPE_PUT == 0x01); - CLASSERT (PTLLND_MSG_TYPE_GET == 0x02); - CLASSERT (PTLLND_MSG_TYPE_IMMEDIATE == 0x03); - CLASSERT (PTLLND_MSG_TYPE_NOOP == 0x04); - CLASSERT (PTLLND_MSG_TYPE_HELLO == 0x05); - CLASSERT (PTLLND_MSG_TYPE_NAK == 0x06); - - /* Checks for struct kptl_msg_t */ - CLASSERT ((int)sizeof(kptl_msg_t) == 136); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_magic) == 0); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_magic) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_version) == 4); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_version) == 2); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_type) == 6); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_type) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_credits) == 7); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_credits) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_nob) == 8); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_nob) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_cksum) == 12); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_cksum) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcnid) == 16); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcstamp) == 24); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcstamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstnid) == 32); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dststamp) == 40); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dststamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcpid) == 48); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstpid) == 52); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.immediate) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.immediate) == 72); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.rdma) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.rdma) == 80); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.hello) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.hello) == 12); - - /* Checks for struct kptl_immediate_msg_t */ - CLASSERT ((int)sizeof(kptl_immediate_msg_t) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_hdr) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_payload[13]) == 85); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_payload[13]) == 1); - - /* Checks for struct kptl_rdma_msg_t */ - CLASSERT ((int)sizeof(kptl_rdma_msg_t) == 80); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_hdr) == 72); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_matchbits) == 72); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_matchbits) == 8); - - /* Checks for struct kptl_hello_msg_t */ - CLASSERT ((int)sizeof(kptl_hello_msg_t) == 12); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_matchbits) == 0); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_matchbits) == 8); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_max_msg_size) == 8); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_max_msg_size) == 4); -} - -const char *kptllnd_evtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_EVENT_GET_START); - DO_TYPE(PTL_EVENT_GET_END); - DO_TYPE(PTL_EVENT_PUT_START); - DO_TYPE(PTL_EVENT_PUT_END); - DO_TYPE(PTL_EVENT_REPLY_START); - DO_TYPE(PTL_EVENT_REPLY_END); - DO_TYPE(PTL_EVENT_ACK); - DO_TYPE(PTL_EVENT_SEND_START); - DO_TYPE(PTL_EVENT_SEND_END); - DO_TYPE(PTL_EVENT_UNLINK); - default: - return "<unknown event type>"; - } -#undef DO_TYPE -} - -const char *kptllnd_msgtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTLLND_MSG_TYPE_INVALID); - DO_TYPE(PTLLND_MSG_TYPE_PUT); - DO_TYPE(PTLLND_MSG_TYPE_GET); - DO_TYPE(PTLLND_MSG_TYPE_IMMEDIATE); - DO_TYPE(PTLLND_MSG_TYPE_HELLO); - DO_TYPE(PTLLND_MSG_TYPE_NOOP); - DO_TYPE(PTLLND_MSG_TYPE_NAK); - default: - return "<unknown msg type>"; - } -#undef DO_TYPE -} - -const char *kptllnd_errtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_OK); - DO_TYPE(PTL_SEGV); - DO_TYPE(PTL_NO_SPACE); - DO_TYPE(PTL_ME_IN_USE); - DO_TYPE(PTL_NAL_FAILED); - DO_TYPE(PTL_NO_INIT); - DO_TYPE(PTL_IFACE_DUP); - DO_TYPE(PTL_IFACE_INVALID); - DO_TYPE(PTL_HANDLE_INVALID); - DO_TYPE(PTL_MD_INVALID); - DO_TYPE(PTL_ME_INVALID); - DO_TYPE(PTL_PROCESS_INVALID); - DO_TYPE(PTL_PT_INDEX_INVALID); - DO_TYPE(PTL_SR_INDEX_INVALID); - DO_TYPE(PTL_EQ_INVALID); - DO_TYPE(PTL_EQ_DROPPED); - DO_TYPE(PTL_EQ_EMPTY); - DO_TYPE(PTL_MD_NO_UPDATE); - DO_TYPE(PTL_FAIL); - DO_TYPE(PTL_AC_INDEX_INVALID); - DO_TYPE(PTL_MD_ILLEGAL); - DO_TYPE(PTL_ME_LIST_TOO_LONG); - DO_TYPE(PTL_MD_IN_USE); - DO_TYPE(PTL_NI_INVALID); - DO_TYPE(PTL_PID_INVALID); - DO_TYPE(PTL_PT_FULL); - DO_TYPE(PTL_VAL_FAILED); - DO_TYPE(PTL_NOT_IMPLEMENTED); - DO_TYPE(PTL_NO_ACK); - DO_TYPE(PTL_EQ_IN_USE); - DO_TYPE(PTL_PID_IN_USE); - DO_TYPE(PTL_INV_EQ_SIZE); - DO_TYPE(PTL_AGAIN); - default: - return "<unknown event type>"; - } -#undef DO_TYPE -} - -__u32 -kptllnd_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kptllnd_init_msg(kptl_msg_t *msg, int type, int body_nob) -{ - msg->ptlm_type = type; - msg->ptlm_nob = (offsetof(kptl_msg_t, ptlm_u) + body_nob + 7) & ~7; - - LASSERT(msg->ptlm_nob <= *kptllnd_tunables.kptl_max_msg_size); -} - -void -kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer) -{ - msg->ptlm_magic = PTLLND_MSG_MAGIC; - msg->ptlm_version = PTLLND_MSG_VERSION; - /* msg->ptlm_type Filled in kptllnd_init_msg() */ - msg->ptlm_credits = peer->peer_outstanding_credits; - /* msg->ptlm_nob Filled in kptllnd_init_msg() */ - msg->ptlm_cksum = 0; - msg->ptlm_srcnid = kptllnd_data.kptl_ni->ni_nid; - msg->ptlm_srcstamp = peer->peer_myincarnation; - msg->ptlm_dstnid = peer->peer_id.nid; - msg->ptlm_dststamp = peer->peer_incarnation; - msg->ptlm_srcpid = the_lnet.ln_pid; - msg->ptlm_dstpid = peer->peer_id.pid; - - if (*kptllnd_tunables.kptl_checksum) { - /* NB ptlm_cksum zero while computing cksum */ - msg->ptlm_cksum = kptllnd_cksum(msg, - offsetof(kptl_msg_t, ptlm_u)); - } -} - -int -kptllnd_msg_unpack(kptl_msg_t *msg, int nob) -{ - const int hdr_size = offsetof(kptl_msg_t, ptlm_u); - __u32 msg_cksum; - __u16 msg_version; - int flip; - - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Very Short message: %d\n", nob); - return -EPROTO; - } - - /* - * Determine if we need to flip - */ - if (msg->ptlm_magic == PTLLND_MSG_MAGIC) { - flip = 0; - } else if (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC)) { - flip = 1; - } else { - CERROR("Bad magic: %08x\n", msg->ptlm_magic); - return -EPROTO; - } - - msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version; - - if (msg_version != PTLLND_MSG_VERSION) { - CERROR("Bad version: got %04x expected %04x\n", - (__u32)msg_version, PTLLND_MSG_VERSION); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: got %d, wanted at least %d\n", - nob, hdr_size); - return -EPROTO; - } - - /* checksum must be computed with - * 1) ptlm_cksum zero and - * 2) BEFORE anything gets modified/flipped - */ - msg_cksum = flip ? __swab32(msg->ptlm_cksum) : msg->ptlm_cksum; - msg->ptlm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kptllnd_cksum(msg, hdr_size)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - - msg->ptlm_version = msg_version; - msg->ptlm_cksum = msg_cksum; - - if (flip) { - /* These two are 1 byte long so we don't swap them - But check this assumtion*/ - CLASSERT (sizeof(msg->ptlm_type) == 1); - CLASSERT (sizeof(msg->ptlm_credits) == 1); - /* src & dst stamps are opaque cookies */ - __swab32s(&msg->ptlm_nob); - __swab64s(&msg->ptlm_srcnid); - __swab64s(&msg->ptlm_dstnid); - __swab32s(&msg->ptlm_srcpid); - __swab32s(&msg->ptlm_dstpid); - } - - if (msg->ptlm_nob != nob) { - CERROR("msg_nob corrupt: got 0x%08x, wanted %08x\n", - msg->ptlm_nob, nob); - return -EPROTO; - } - - switch(msg->ptlm_type) - { - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - if (nob < hdr_size + sizeof(kptl_rdma_msg_t)) { - CERROR("Short rdma request: got %d, want %d\n", - nob, hdr_size + (int)sizeof(kptl_rdma_msg_t)); - return -EPROTO; - } - - if (flip) - __swab64s(&msg->ptlm_u.rdma.kptlrm_matchbits); - - if (msg->ptlm_u.rdma.kptlrm_matchbits < PTL_RESERVED_MATCHBITS) { - CERROR("Bad matchbits "LPX64"\n", - msg->ptlm_u.rdma.kptlrm_matchbits); - return -EPROTO; - } - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - if (nob < offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload)) { - CERROR("Short immediate: got %d, want %d\n", nob, - (int)offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload)); - return -EPROTO; - } - /* Do nothing */ - break; - - case PTLLND_MSG_TYPE_NOOP: - case PTLLND_MSG_TYPE_NAK: - /* Do nothing */ - break; - - case PTLLND_MSG_TYPE_HELLO: - if (nob < hdr_size + sizeof(kptl_hello_msg_t)) { - CERROR("Short hello: got %d want %d\n", - nob, hdr_size + (int)sizeof(kptl_hello_msg_t)); - return -EPROTO; - } - if (flip) { - __swab64s(&msg->ptlm_u.hello.kptlhm_matchbits); - __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size); - } - break; - - default: - CERROR("Bad message type: 0x%02x\n", (__u32)msg->ptlm_type); - return -EPROTO; - } - - return 0; -} - -int -kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - CDEBUG(D_NET, ">>> kptllnd_ctl cmd=%u arg=%p\n", cmd, arg); - - /* - * Validate that the context block is actually - * pointing to this interface - */ - LASSERT (ni == kptllnd_data.kptl_ni); - - switch(cmd) { - case IOC_LIBCFS_DEL_PEER: { - lnet_process_id_t id; - - id.nid = data->ioc_nid; - id.pid = data->ioc_u32[1]; - - rc = kptllnd_peer_del(id); - break; - } - - case IOC_LIBCFS_GET_PEER: { - lnet_process_id_t id = {.nid = LNET_NID_ANY, - .pid = LNET_PID_ANY}; - __u64 incarnation = 0; - __u64 next_matchbits = 0; - __u64 last_matchbits_seen = 0; - int state = 0; - int sent_hello = 0; - int refcount = 0; - int nsendq = 0; - int nactiveq = 0; - int credits = 0; - int outstanding_credits = 0; - - rc = kptllnd_get_peer_info(data->ioc_count, &id, - &state, &sent_hello, - &refcount, &incarnation, - &next_matchbits, &last_matchbits_seen, - &nsendq, &nactiveq, - &credits, &outstanding_credits); - /* wince... */ - data->ioc_nid = id.nid; - data->ioc_net = state; - data->ioc_flags = sent_hello; - data->ioc_count = refcount; - data->ioc_u64[0] = incarnation; - data->ioc_u32[0] = (__u32)next_matchbits; - data->ioc_u32[1] = (__u32)(next_matchbits >> 32); - data->ioc_u32[2] = (__u32)last_matchbits_seen; - data->ioc_u32[3] = (__u32)(last_matchbits_seen >> 32); - data->ioc_u32[4] = id.pid; - data->ioc_u32[5] = (nsendq << 16) | nactiveq; - data->ioc_u32[6] = (credits << 16) | outstanding_credits; - break; - } - - default: - rc=-EINVAL; - break; - } - CDEBUG(D_NET, "<<< kptllnd_ctl rc=%d\n", rc); - return rc; -} - -int -kptllnd_startup (lnet_ni_t *ni) -{ - int rc; - int i; - int spares; - struct timeval tv; - ptl_err_t ptl_rc; - - LASSERT (ni->ni_lnd == &kptllnd_lnd); - - if (kptllnd_data.kptl_init != PTLLND_INIT_NOTHING) { - CERROR("Only 1 instance supported\n"); - return -EPERM; - } - - if (*kptllnd_tunables.kptl_max_procs_per_node < 1) { - CERROR("max_procs_per_node must be >= 1\n"); - return -EINVAL; - } - - /* kptl_msg_t::ptlm_credits is only a __u8 */ - if (*kptllnd_tunables.kptl_peercredits > 255) { - CERROR("kptl_peercredits must be <= 255\n"); - return -EINVAL; - } - - *kptllnd_tunables.kptl_max_msg_size &= ~7; - if (*kptllnd_tunables.kptl_max_msg_size < PTLLND_MIN_BUFFER_SIZE) - *kptllnd_tunables.kptl_max_msg_size = PTLLND_MIN_BUFFER_SIZE; - - CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0); - CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE); - - /* - * zero pointers, flags etc - * put everything into a known state. - */ - memset (&kptllnd_data, 0, sizeof (kptllnd_data)); - kptllnd_data.kptl_eqh = PTL_INVALID_HANDLE; - kptllnd_data.kptl_nih = PTL_INVALID_HANDLE; - - /* - * Setup the sched locks/lists/waitq - */ - spin_lock_init(&kptllnd_data.kptl_sched_lock); - init_waitqueue_head(&kptllnd_data.kptl_sched_waitq); - INIT_LIST_HEAD(&kptllnd_data.kptl_sched_txq); - INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxq); - INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxbq); - - /* init kptl_ptlid2str_lock before any call to kptllnd_ptlid2str */ - spin_lock_init(&kptllnd_data.kptl_ptlid2str_lock); - - /* - * Setup the tx locks/lists - */ - spin_lock_init(&kptllnd_data.kptl_tx_lock); - INIT_LIST_HEAD(&kptllnd_data.kptl_idle_txs); - atomic_set(&kptllnd_data.kptl_ntx, 0); - - /* - * Uptick the module reference count - */ - PORTAL_MODULE_USE; - - /* - * Setup pointers between the ni and context data block - */ - kptllnd_data.kptl_ni = ni; - ni->ni_data = &kptllnd_data; - - /* - * Setup Credits - */ - ni->ni_maxtxcredits = *kptllnd_tunables.kptl_credits; - ni->ni_peertxcredits = *kptllnd_tunables.kptl_peercredits; - - kptllnd_data.kptl_expected_peers = - *kptllnd_tunables.kptl_max_nodes * - *kptllnd_tunables.kptl_max_procs_per_node; - - /* - * Initialize the Network interface instance - * We use the default because we don't have any - * way to choose a better interface. - * Requested and actual limits are ignored. - */ - ptl_rc = PtlNIInit( -#ifdef _USING_LUSTRE_PORTALS_ - PTL_IFACE_DEFAULT, -#else - CRAY_KERN_NAL, -#endif - *kptllnd_tunables.kptl_pid, NULL, NULL, - &kptllnd_data.kptl_nih); - - /* - * Note: PTL_IFACE_DUP simply means that the requested - * interface was already inited and that we're sharing it. - * Which is ok. - */ - if (ptl_rc != PTL_OK && ptl_rc != PTL_IFACE_DUP) { - CERROR ("PtlNIInit: error %s(%d)\n", - kptllnd_errtype2str(ptl_rc), ptl_rc); - rc = -EINVAL; - goto failed; - } - - /* NB eq size irrelevant if using a callback */ - ptl_rc = PtlEQAlloc(kptllnd_data.kptl_nih, - 8, /* size */ - kptllnd_eq_callback, /* handler callback */ - &kptllnd_data.kptl_eqh); /* output handle */ - if (ptl_rc != PTL_OK) { - CERROR("PtlEQAlloc failed %s(%d)\n", - kptllnd_errtype2str(ptl_rc), ptl_rc); - rc = -ENOMEM; - goto failed; - } - - /* - * Fetch the lower NID - */ - ptl_rc = PtlGetId(kptllnd_data.kptl_nih, - &kptllnd_data.kptl_portals_id); - if (ptl_rc != PTL_OK) { - CERROR ("PtlGetID: error %s(%d)\n", - kptllnd_errtype2str(ptl_rc), ptl_rc); - rc = -EINVAL; - goto failed; - } - - if (kptllnd_data.kptl_portals_id.pid != *kptllnd_tunables.kptl_pid) { - /* The kernel ptllnd must have the expected PID */ - CERROR("Unexpected PID: %u (%u expected)\n", - kptllnd_data.kptl_portals_id.pid, - *kptllnd_tunables.kptl_pid); - rc = -EINVAL; - goto failed; - } - - ni->ni_nid = kptllnd_ptl2lnetnid(kptllnd_data.kptl_portals_id.nid); - - CDEBUG(D_NET, "ptl id=%s, lnet id=%s\n", - kptllnd_ptlid2str(kptllnd_data.kptl_portals_id), - libcfs_nid2str(ni->ni_nid)); - - /* Initialized the incarnation - it must be for-all-time unique, even - * accounting for the fact that we increment it when we disconnect a - * peer that's using it */ - do_gettimeofday(&tv); - kptllnd_data.kptl_incarnation = (((__u64)tv.tv_sec) * 1000000) + - tv.tv_usec; - CDEBUG(D_NET, "Incarnation="LPX64"\n", kptllnd_data.kptl_incarnation); - - /* - * Allocate and setup the peer hash table - */ - rwlock_init(&kptllnd_data.kptl_peer_rw_lock); - init_waitqueue_head(&kptllnd_data.kptl_watchdog_waitq); - INIT_LIST_HEAD(&kptllnd_data.kptl_closing_peers); - INIT_LIST_HEAD(&kptllnd_data.kptl_zombie_peers); - - kptllnd_data.kptl_peer_hash_size = - *kptllnd_tunables.kptl_peer_hash_table_size; - LIBCFS_ALLOC(kptllnd_data.kptl_peers, - (kptllnd_data.kptl_peer_hash_size * - sizeof(struct list_head))); - if (kptllnd_data.kptl_peers == NULL) { - CERROR("Failed to allocate space for peer hash table size=%d\n", - kptllnd_data.kptl_peer_hash_size); - rc = -ENOMEM; - goto failed; - } - for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) - INIT_LIST_HEAD(&kptllnd_data.kptl_peers[i]); - - LIBCFS_ALLOC(kptllnd_data.kptl_nak_msg, offsetof(kptl_msg_t, ptlm_u)); - if (kptllnd_data.kptl_nak_msg == NULL) { - CERROR("Can't allocate NAK msg\n"); - rc = -ENOMEM; - goto failed; - } - memset(kptllnd_data.kptl_nak_msg, 0, offsetof(kptl_msg_t, ptlm_u)); - kptllnd_init_msg(kptllnd_data.kptl_nak_msg, PTLLND_MSG_TYPE_NAK, 0); - kptllnd_data.kptl_nak_msg->ptlm_magic = PTLLND_MSG_MAGIC; - kptllnd_data.kptl_nak_msg->ptlm_version = PTLLND_MSG_VERSION; - kptllnd_data.kptl_nak_msg->ptlm_srcpid = the_lnet.ln_pid; - kptllnd_data.kptl_nak_msg->ptlm_srcnid = ni->ni_nid; - kptllnd_data.kptl_nak_msg->ptlm_srcstamp = kptllnd_data.kptl_incarnation; - kptllnd_data.kptl_nak_msg->ptlm_dstpid = LNET_PID_ANY; - kptllnd_data.kptl_nak_msg->ptlm_dstnid = LNET_NID_ANY; - - kptllnd_rx_buffer_pool_init(&kptllnd_data.kptl_rx_buffer_pool); - - kptllnd_data.kptl_rx_cache = - cfs_mem_cache_create("ptllnd_rx", - sizeof(kptl_rx_t) + - *kptllnd_tunables.kptl_max_msg_size, - 0, /* offset */ - 0); /* flags */ - if (kptllnd_data.kptl_rx_cache == NULL) { - CERROR("Can't create slab for RX descriptors\n"); - rc = -ENOMEM; - goto failed; - } - - /* lists/ptrs/locks initialised */ - kptllnd_data.kptl_init = PTLLND_INIT_DATA; - - /*****************************************************/ - - rc = kptllnd_setup_tx_descs(); - if (rc != 0) { - CERROR("Can't pre-allocate %d TX descriptors: %d\n", - *kptllnd_tunables.kptl_ntx, rc); - goto failed; - } - - /* Start the scheduler threads for handling incoming requests. No need - * to advance the state because this will be automatically cleaned up - * now that PTLNAT_INIT_DATA state has been entered */ - CDEBUG(D_NET, "starting %d scheduler threads\n", PTLLND_N_SCHED); - for (i = 0; i < PTLLND_N_SCHED; i++) { - rc = kptllnd_thread_start(kptllnd_scheduler, (void *)((long)i)); - if (rc != 0) { - CERROR("Can't spawn scheduler[%d]: %d\n", i, rc); - goto failed; - } - } - - rc = kptllnd_thread_start(kptllnd_watchdog, NULL); - if (rc != 0) { - CERROR("Can't spawn watchdog: %d\n", rc); - goto failed; - } - - /* Ensure that 'rxb_nspare' buffers can be off the net (being emptied) - * and we will still have enough buffers posted for all our peers */ - spares = *kptllnd_tunables.kptl_rxb_nspare * - ((*kptllnd_tunables.kptl_rxb_npages * PAGE_SIZE)/ - *kptllnd_tunables.kptl_max_msg_size); - - /* reserve and post the buffers */ - rc = kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool, - kptllnd_data.kptl_expected_peers + - spares); - if (rc != 0) { - CERROR("Can't reserve RX Buffer pool: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kptllnd_data.kptl_init = PTLLND_INIT_ALL; - - /*****************************************************/ - - if (*kptllnd_tunables.kptl_checksum) - CWARN("Checksumming enabled\n"); - - CDEBUG(D_NET, "<<< kptllnd_startup SUCCESS\n"); - return 0; - - failed: - CDEBUG(D_NET, "kptllnd_startup failed rc=%d\n", rc); - kptllnd_shutdown(ni); - return rc; -} - -void -kptllnd_shutdown (lnet_ni_t *ni) -{ - int i; - ptl_err_t prc; - lnet_process_id_t process_id; - unsigned long flags; - - CDEBUG(D_MALLOC, "before LND cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - LASSERT (ni == kptllnd_data.kptl_ni); - - switch (kptllnd_data.kptl_init) { - default: - LBUG(); - - case PTLLND_INIT_ALL: - case PTLLND_INIT_DATA: - /* Stop receiving */ - kptllnd_rx_buffer_pool_fini(&kptllnd_data.kptl_rx_buffer_pool); - LASSERT (list_empty(&kptllnd_data.kptl_sched_rxq)); - LASSERT (list_empty(&kptllnd_data.kptl_sched_rxbq)); - - /* Hold peertable lock to interleave cleanly with peer birth/death */ - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - LASSERT (kptllnd_data.kptl_shutdown == 0); - kptllnd_data.kptl_shutdown = 1; /* phase 1 == destroy peers */ - - /* no new peers possible now */ - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - /* nuke all existing peers */ - process_id.nid = LNET_NID_ANY; - process_id.pid = LNET_PID_ANY; - kptllnd_peer_del(process_id); - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - LASSERT (kptllnd_data.kptl_n_active_peers == 0); - - i = 2; - while (kptllnd_data.kptl_npeers != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for %d peers to terminate\n", - kptllnd_data.kptl_npeers); - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - cfs_pause(cfs_time_seconds(1)); - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, - flags); - } - - LASSERT(list_empty(&kptllnd_data.kptl_closing_peers)); - LASSERT(list_empty(&kptllnd_data.kptl_zombie_peers)); - LASSERT (kptllnd_data.kptl_peers != NULL); - for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) - LASSERT (list_empty (&kptllnd_data.kptl_peers[i])); - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - CDEBUG(D_NET, "All peers deleted\n"); - - /* Shutdown phase 2: kill the daemons... */ - kptllnd_data.kptl_shutdown = 2; - mb(); - - i = 2; - while (atomic_read (&kptllnd_data.kptl_nthreads) != 0) { - /* Wake up all threads*/ - wake_up_all(&kptllnd_data.kptl_sched_waitq); - wake_up_all(&kptllnd_data.kptl_watchdog_waitq); - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read(&kptllnd_data.kptl_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - - CDEBUG(D_NET, "All Threads stopped\n"); - LASSERT(list_empty(&kptllnd_data.kptl_sched_txq)); - - kptllnd_cleanup_tx_descs(); - - /* Nothing here now, but libcfs might soon require - * us to explicitly destroy wait queues and semaphores - * that would be done here */ - - /* fall through */ - - case PTLLND_INIT_NOTHING: - CDEBUG(D_NET, "PTLLND_INIT_NOTHING\n"); - break; - } - - if (!PtlHandleIsEqual(kptllnd_data.kptl_eqh, PTL_INVALID_HANDLE)) { - prc = PtlEQFree(kptllnd_data.kptl_eqh); - if (prc != PTL_OK) - CERROR("Error %s(%d) freeing portals EQ\n", - kptllnd_errtype2str(prc), prc); - } - - if (!PtlHandleIsEqual(kptllnd_data.kptl_nih, PTL_INVALID_HANDLE)) { - prc = PtlNIFini(kptllnd_data.kptl_nih); - if (prc != PTL_OK) - CERROR("Error %s(%d) finalizing portals NI\n", - kptllnd_errtype2str(prc), prc); - } - - LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0); - LASSERT (list_empty(&kptllnd_data.kptl_idle_txs)); - - if (kptllnd_data.kptl_rx_cache != NULL) - cfs_mem_cache_destroy(kptllnd_data.kptl_rx_cache); - - if (kptllnd_data.kptl_peers != NULL) - LIBCFS_FREE (kptllnd_data.kptl_peers, - sizeof (struct list_head) * - kptllnd_data.kptl_peer_hash_size); - - if (kptllnd_data.kptl_nak_msg != NULL) - LIBCFS_FREE (kptllnd_data.kptl_nak_msg, - offsetof(kptl_msg_t, ptlm_u)); - - memset(&kptllnd_data, 0, sizeof(kptllnd_data)); - - CDEBUG(D_MALLOC, "after LND cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - PORTAL_MODULE_UNUSE; -} - -int __init -kptllnd_module_init (void) -{ - int rc; - - kptllnd_assert_wire_constants(); - - rc = kptllnd_tunables_init(); - if (rc != 0) - return rc; - - kptllnd_init_ptltrace(); - - lnet_register_lnd(&kptllnd_lnd); - - return 0; -} - -void __exit -kptllnd_module_fini (void) -{ - lnet_unregister_lnd(&kptllnd_lnd); - kptllnd_tunables_fini(); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel Portals LND v1.00"); -MODULE_LICENSE("GPL"); - -module_init(kptllnd_module_init); -module_exit(kptllnd_module_fini); diff --git a/lnet/klnds/ptllnd/ptllnd.h b/lnet/klnds/ptllnd/ptllnd.h deleted file mode 100755 index b1d436015df53f7b451abe6e67dcff2d6dfc868e..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd.h +++ /dev/null @@ -1,555 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <linux/random.h> - -#include <net/sock.h> -#include <linux/in.h> - - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> -#include <portals/p30.h> -#ifdef CRAY_XT3 -#include <portals/ptltrace.h> -#endif -#include <lnet/ptllnd.h> /* Depends on portals/p30.h */ - -/* - * Define this to enable console debug logging - * and simulation - */ -//#define PJK_DEBUGGING - -#ifdef CONFIG_SMP -# define PTLLND_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define PTLLND_N_SCHED 1 /* # schedulers */ -#endif - -#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peercredits)-1) - /* when eagerly to return credits */ - -typedef struct -{ - int *kptl_ntx; /* # tx descs to pre-allocate */ - int *kptl_max_nodes; /* max # nodes all talking to me */ - int *kptl_max_procs_per_node; /* max # processes per node */ - int *kptl_checksum; /* checksum kptl_msg_t? */ - int *kptl_timeout; /* comms timeout (seconds) */ - int *kptl_portal; /* portal number */ - int *kptl_pid; /* portals PID (self + kernel peers) */ - int *kptl_rxb_npages; /* number of pages for rx buffer */ - int *kptl_rxb_nspare; /* number of spare rx buffers */ - int *kptl_credits; /* number of credits */ - int *kptl_peercredits; /* number of credits */ - int *kptl_max_msg_size; /* max immd message size*/ - int *kptl_peer_hash_table_size; /* # slots in peer hash table */ - int *kptl_reschedule_loops; /* scheduler yield loops */ - int *kptl_ack_puts; /* make portals ack PUTs */ -#ifdef CRAY_XT3 - int *kptl_ptltrace_on_timeout; /* dump pltrace on timeout? */ - char **kptl_ptltrace_basename; /* ptltrace dump file basename */ -#endif -#ifdef PJK_DEBUGGING - int *kptl_simulation_bitmap;/* simulation bitmap */ -#endif - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - cfs_sysctl_table_header_t *kptl_sysctl; /* sysctl interface */ -#endif -} kptl_tunables_t; - -#include "lnet/ptllnd_wire.h" - -/***********************************************************************/ - -typedef struct kptl_data kptl_data_t; -typedef struct kptl_rx_buffer kptl_rx_buffer_t; -typedef struct kptl_peer kptl_peer_t; - -typedef struct { - char eva_type; -} kptl_eventarg_t; - -#define PTLLND_EVENTARG_TYPE_MSG 0x1 -#define PTLLND_EVENTARG_TYPE_RDMA 0x2 -#define PTLLND_EVENTARG_TYPE_BUF 0x3 - -typedef struct kptl_rx /* receive message */ -{ - struct list_head rx_list; /* queue for attention */ - kptl_rx_buffer_t *rx_rxb; /* the rx buffer pointer */ - kptl_msg_t *rx_msg; /* received message */ - int rx_nob; /* received message size */ - unsigned long rx_treceived; /* time received */ - ptl_process_id_t rx_initiator; /* sender's address */ -#ifdef CRAY_XT3 - ptl_uid_t rx_uid; /* sender's uid */ -#endif - kptl_peer_t *rx_peer; /* pointer to peer */ - char rx_space[0]; /* copy of incoming request */ -} kptl_rx_t; - -#define PTLLND_POSTRX_DONT_POST 0 /* don't post */ -#define PTLLND_POSTRX_NO_CREDIT 1 /* post: no credits */ -#define PTLLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */ - -typedef struct kptl_rx_buffer_pool -{ - spinlock_t rxbp_lock; - struct list_head rxbp_list; /* all allocated buffers */ - int rxbp_count; /* # allocated buffers */ - int rxbp_reserved; /* # requests to buffer */ - int rxbp_shutdown; /* shutdown flag */ -} kptl_rx_buffer_pool_t; - -struct kptl_rx_buffer -{ - kptl_rx_buffer_pool_t *rxb_pool; - struct list_head rxb_list; /* for the rxb_pool list */ - struct list_head rxb_repost_list;/* for the kptl_sched_rxbq list */ - int rxb_posted:1; /* on the net */ - int rxb_idle:1; /* all done */ - kptl_eventarg_t rxb_eventarg; /* event->md.user_ptr */ - int rxb_refcount; /* reference count */ - ptl_handle_md_t rxb_mdh; /* the portals memory descriptor (MD) handle */ - char *rxb_buffer; /* the buffer */ - -}; - -enum kptl_tx_type -{ - TX_TYPE_RESERVED = 0, - TX_TYPE_SMALL_MESSAGE = 1, - TX_TYPE_PUT_REQUEST = 2, - TX_TYPE_GET_REQUEST = 3, - TX_TYPE_PUT_RESPONSE = 4, - TX_TYPE_GET_RESPONSE = 5, -}; - -typedef union { -#ifdef _USING_LUSTRE_PORTALS_ - struct iovec iov[PTL_MD_MAX_IOV]; - lnet_kiov_t kiov[PTL_MD_MAX_IOV]; -#else - ptl_md_iovec_t iov[PTL_MD_MAX_IOV]; -#endif -} kptl_fragvec_t; - -typedef struct kptl_tx /* transmit message */ -{ - struct list_head tx_list; /* queue on idle_txs etc */ - atomic_t tx_refcount; /* reference count*/ - enum kptl_tx_type tx_type; /* small msg/{put,get}{req,resp} */ - int tx_active:1; /* queued on the peer */ - int tx_idle:1; /* on the free list */ - int tx_acked:1; /* portals ACK wanted (for debug only) */ - kptl_eventarg_t tx_msg_eventarg; /* event->md.user_ptr */ - kptl_eventarg_t tx_rdma_eventarg; /* event->md.user_ptr */ - int tx_status; /* the status of this tx descriptor */ - ptl_handle_md_t tx_rdma_mdh; /* RDMA buffer */ - ptl_handle_md_t tx_msg_mdh; /* the portals MD handle for the initial message */ - lnet_msg_t *tx_lnet_msg; /* LNET message to finalize */ - lnet_msg_t *tx_lnet_replymsg; /* LNET reply message to finalize */ - kptl_msg_t *tx_msg; /* the message data */ - kptl_peer_t *tx_peer; /* the peer this is waiting on */ - unsigned long tx_deadline; /* deadline */ - unsigned long tx_tposted; /* time posted */ - ptl_md_t tx_rdma_md; /* rdma descriptor */ - kptl_fragvec_t *tx_frags; /* buffer fragments */ -} kptl_tx_t; - -enum kptllnd_peer_state -{ - PEER_STATE_UNINITIALIZED = 0, - PEER_STATE_ALLOCATED = 1, - PEER_STATE_WAITING_HELLO = 2, - PEER_STATE_ACTIVE = 3, - PEER_STATE_CLOSING = 4, - PEER_STATE_ZOMBIE = 5, -}; - -struct kptl_peer -{ - struct list_head peer_list; - atomic_t peer_refcount; /* The current refrences */ - enum kptllnd_peer_state peer_state; - spinlock_t peer_lock; /* serialize */ - struct list_head peer_noops; /* PTLLND_MSG_TYPE_NOOP txs */ - struct list_head peer_sendq; /* txs waiting for mh handles */ - struct list_head peer_activeq; /* txs awaiting completion */ - lnet_process_id_t peer_id; /* Peer's LNET id */ - ptl_process_id_t peer_ptlid; /* Peer's portals id */ - __u64 peer_incarnation; /* peer's incarnation */ - __u64 peer_myincarnation; /* my incarnation at HELLO */ - int peer_sent_hello; /* have I sent HELLO? */ - int peer_credits; /* number of send credits */ - int peer_outstanding_credits;/* number of peer credits to return */ - int peer_sent_credits; /* #msg buffers posted for peer */ - int peer_max_msg_size; /* peer's rx buffer size */ - int peer_error; /* errno on closing this peer */ - int peer_retry_noop; /* need to retry returning credits */ - int peer_check_stamp; /* watchdog check stamp */ - cfs_time_t peer_last_alive; /* when (in jiffies) I was last alive */ - __u64 peer_next_matchbits; /* Next value to register RDMA from peer */ - __u64 peer_last_matchbits_seen; /* last matchbits used to RDMA to peer */ -}; - -struct kptl_data -{ - int kptl_init; /* initialisation state */ - volatile int kptl_shutdown; /* shut down? */ - atomic_t kptl_nthreads; /* # live threads */ - lnet_ni_t *kptl_ni; /* _the_ LND instance */ - ptl_handle_ni_t kptl_nih; /* network inteface handle */ - ptl_process_id_t kptl_portals_id; /* Portals ID of interface */ - __u64 kptl_incarnation; /* which one am I */ - ptl_handle_eq_t kptl_eqh; /* Event Queue (EQ) */ - - spinlock_t kptl_sched_lock; /* serialise... */ - wait_queue_head_t kptl_sched_waitq; /* schedulers sleep here */ - struct list_head kptl_sched_txq; /* tx requiring attention */ - struct list_head kptl_sched_rxq; /* rx requiring attention */ - struct list_head kptl_sched_rxbq; /* rxb requiring reposting */ - - wait_queue_head_t kptl_watchdog_waitq; /* watchdog sleeps here */ - - kptl_rx_buffer_pool_t kptl_rx_buffer_pool; /* rx buffer pool */ - cfs_mem_cache_t* kptl_rx_cache; /* rx descripter cache */ - - atomic_t kptl_ntx; /* # tx descs allocated */ - spinlock_t kptl_tx_lock; /* serialise idle tx list*/ - struct list_head kptl_idle_txs; /* idle tx descriptors */ - - rwlock_t kptl_peer_rw_lock; /* lock for peer table */ - struct list_head *kptl_peers; /* hash table of all my known peers */ - struct list_head kptl_closing_peers; /* peers being closed */ - struct list_head kptl_zombie_peers; /* peers waiting for refs to drain */ - int kptl_peer_hash_size; /* size of kptl_peers */ - int kptl_npeers; /* # peers extant */ - int kptl_n_active_peers; /* # active peers */ - int kptl_expected_peers; /* # peers I can buffer HELLOs from */ - - kptl_msg_t *kptl_nak_msg; /* common NAK message */ - spinlock_t kptl_ptlid2str_lock; /* serialise str ops */ -}; - -enum -{ - PTLLND_INIT_NOTHING = 0, - PTLLND_INIT_DATA, - PTLLND_INIT_ALL, -}; - -extern kptl_tunables_t kptllnd_tunables; -extern kptl_data_t kptllnd_data; - -static inline lnet_nid_t -kptllnd_ptl2lnetnid(ptl_nid_t ptl_nid) -{ -#ifdef _USING_LUSTRE_PORTALS_ - return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid), - LNET_NIDADDR(ptl_nid)); -#else - return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid), - ptl_nid); -#endif -} - -static inline ptl_nid_t -kptllnd_lnet2ptlnid(lnet_nid_t lnet_nid) -{ -#ifdef _USING_LUSTRE_PORTALS_ - return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_portals_id.nid), - LNET_NIDADDR(lnet_nid)); -#else - return LNET_NIDADDR(lnet_nid); -#endif -} - -int kptllnd_startup(lnet_ni_t *ni); -void kptllnd_shutdown(lnet_ni_t *ni); -int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int kptllnd_eager_recv(struct lnet_ni *ni, void *private, - lnet_msg_t *msg, void **new_privatep); -void kptllnd_eq_callback(ptl_event_t *evp); -int kptllnd_scheduler(void *arg); -int kptllnd_watchdog(void *arg); -int kptllnd_thread_start(int (*fn)(void *arg), void *arg); -int kptllnd_tunables_init(void); -void kptllnd_tunables_fini(void); - -const char *kptllnd_evtype2str(int evtype); -const char *kptllnd_msgtype2str(int msgtype); -const char *kptllnd_errtype2str(int errtype); - -static inline void * -kptllnd_eventarg2obj (kptl_eventarg_t *eva) -{ - switch (eva->eva_type) { - default: - LBUG(); - case PTLLND_EVENTARG_TYPE_BUF: - return list_entry(eva, kptl_rx_buffer_t, rxb_eventarg); - case PTLLND_EVENTARG_TYPE_RDMA: - return list_entry(eva, kptl_tx_t, tx_rdma_eventarg); - case PTLLND_EVENTARG_TYPE_MSG: - return list_entry(eva, kptl_tx_t, tx_msg_eventarg); - } -} - -/* - * RX BUFFER SUPPORT FUNCTIONS - */ -void kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp); -void kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp); -int kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count); -void kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp, int count); -void kptllnd_rx_buffer_callback(ptl_event_t *ev); -void kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb); - -static inline int -kptllnd_rx_buffer_size(void) -{ - return PAGE_SIZE * (*kptllnd_tunables.kptl_rxb_npages); -} - -static inline void -kptllnd_rx_buffer_addref(kptl_rx_buffer_t *rxb) -{ - unsigned long flags; - - spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags); - rxb->rxb_refcount++; - spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags); -} - -static inline void -kptllnd_rx_buffer_decref_locked(kptl_rx_buffer_t *rxb) -{ - if (--(rxb->rxb_refcount) == 0) { - spin_lock(&kptllnd_data.kptl_sched_lock); - - list_add_tail(&rxb->rxb_repost_list, - &kptllnd_data.kptl_sched_rxbq); - wake_up(&kptllnd_data.kptl_sched_waitq); - - spin_unlock(&kptllnd_data.kptl_sched_lock); - } -} - -static inline void -kptllnd_rx_buffer_decref(kptl_rx_buffer_t *rxb) -{ - unsigned long flags; - int count; - - spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags); - count = --(rxb->rxb_refcount); - spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags); - - if (count == 0) - kptllnd_rx_buffer_post(rxb); -} - -/* - * RX SUPPORT FUNCTIONS - */ -void kptllnd_rx_parse(kptl_rx_t *rx); -void kptllnd_rx_done(kptl_rx_t *rx, int post_credit); - -/* - * PEER SUPPORT FUNCTIONS - */ -int kptllnd_get_peer_info(int index, - lnet_process_id_t *id, - int *state, int *sent_hello, - int *refcount, __u64 *incarnation, - __u64 *next_matchbits, __u64 *last_matchbits_seen, - int *nsendq, int *nactiveq, - int *credits, int *outstanding_credits); -void kptllnd_peer_destroy(kptl_peer_t *peer); -int kptllnd_peer_del(lnet_process_id_t id); -void kptllnd_peer_close_locked(kptl_peer_t *peer, int why); -void kptllnd_peer_close(kptl_peer_t *peer, int why); -void kptllnd_handle_closing_peers(void); -int kptllnd_peer_connect(kptl_tx_t *tx, lnet_nid_t nid); -void kptllnd_peer_check_sends(kptl_peer_t *peer); -void kptllnd_peer_check_bucket(int idx, int stamp); -void kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag); -int kptllnd_find_target(kptl_peer_t **peerp, lnet_process_id_t target); -kptl_peer_t *kptllnd_peer_handle_hello(ptl_process_id_t initiator, - kptl_msg_t *msg); -kptl_peer_t *kptllnd_id2peer_locked(lnet_process_id_t id); -void kptllnd_peer_alive(kptl_peer_t *peer); - -static inline void -kptllnd_peer_addref (kptl_peer_t *peer) -{ - atomic_inc(&peer->peer_refcount); -} - -static inline void -kptllnd_peer_decref (kptl_peer_t *peer) -{ - if (atomic_dec_and_test(&peer->peer_refcount)) - kptllnd_peer_destroy(peer); -} - -static inline void -kptllnd_set_tx_peer(kptl_tx_t *tx, kptl_peer_t *peer) -{ - LASSERT (tx->tx_peer == NULL); - - kptllnd_peer_addref(peer); - tx->tx_peer = peer; -} - -static inline struct list_head * -kptllnd_nid2peerlist(lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % - kptllnd_data.kptl_peer_hash_size; - - return &kptllnd_data.kptl_peers[hash]; -} - -static inline kptl_peer_t * -kptllnd_id2peer(lnet_process_id_t id) -{ - kptl_peer_t *peer; - unsigned long flags; - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - peer = kptllnd_id2peer_locked(id); - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - return peer; -} - -static inline int -kptllnd_reserve_buffers(int n) -{ - return kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool, - n); -} - -static inline int -kptllnd_peer_reserve_buffers(void) -{ - return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peercredits); -} - -static inline void -kptllnd_peer_unreserve_buffers(void) -{ - kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool, - *kptllnd_tunables.kptl_peercredits); -} - -/* - * TX SUPPORT FUNCTIONS - */ -int kptllnd_setup_tx_descs(void); -void kptllnd_cleanup_tx_descs(void); -void kptllnd_tx_fini(kptl_tx_t *tx); -kptl_tx_t *kptllnd_get_idle_tx(enum kptl_tx_type purpose); -void kptllnd_tx_callback(ptl_event_t *ev); -const char *kptllnd_tx_typestr(int type); - -static inline void -kptllnd_tx_addref(kptl_tx_t *tx) -{ - atomic_inc(&tx->tx_refcount); -} - -static inline void -kptllnd_tx_decref(kptl_tx_t *tx) -{ - LASSERT (!in_interrupt()); /* Thread context only */ - - if (atomic_dec_and_test(&tx->tx_refcount)) - kptllnd_tx_fini(tx); -} - -/* - * MESSAGE SUPPORT FUNCTIONS - */ -void kptllnd_init_msg(kptl_msg_t *msg, int type, int body_nob); -void kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer); -int kptllnd_msg_unpack(kptl_msg_t *msg, int nob); - -/* - * MISC SUPPORT FUNCTIONS - */ -void kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int nob); -char *kptllnd_ptlid2str(ptl_process_id_t id); - -void kptllnd_init_ptltrace(void); -void kptllnd_dump_ptltrace(void); - -#ifdef PJK_DEBUGGING -#define SIMULATION_FAIL_TX_PUT_ALLOC 0 /* 0x00000001 */ -#define SIMULATION_FAIL_TX_GET_ALLOC 1 /* 0x00000002 */ -#define SIMULATION_FAIL_TX 2 /* 0x00000004 */ -#define SIMULATION_FAIL_RX_ALLOC 3 /* 0x00000008 */ - -#define IS_SIMULATION_ENABLED(x) \ - (((*kptllnd_tunables.kptl_simulation_bitmap) & 1<< SIMULATION_##x) != 0) -#else -#define IS_SIMULATION_ENABLED(x) 0 -#endif - diff --git a/lnet/klnds/ptllnd/ptllnd_cb.c b/lnet/klnds/ptllnd/ptllnd_cb.c deleted file mode 100644 index 1903fc68b3983716afe0989d98998e515cf34940..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_cb.c +++ /dev/null @@ -1,813 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -#ifndef _USING_LUSTRE_PORTALS_ -int -kptllnd_extract_iov (int dst_niov, ptl_md_iovec_t *dst, - int src_niov, struct iovec *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->iov_len) { /* skip initial frags */ - offset -= src->iov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = src->iov_len - offset; - dst->iov_base = ((char *)src->iov_base) + offset; - - if (len <= frag_len) { - dst->iov_len = len; - return (niov); - } - - dst->iov_len = frag_len; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} - -int -kptllnd_extract_phys (int dst_niov, ptl_md_iovec_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the physical addresses of the subset of 'src' - * starting at 'offset', for exactly 'len' bytes, and return the number - * of entries. NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - __u64 phys_page; - __u64 phys; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->kiov_len) { /* skip initial frags */ - offset -= src->kiov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = min(src->kiov_len - offset, len); - phys_page = lnet_page2phys(src->kiov_page); - phys = phys_page + src->kiov_offset + offset; - - LASSERT (sizeof(void *) > 4 || - (phys <= 0xffffffffULL && - phys + (frag_len - 1) <= 0xffffffffULL)); - - dst->iov_base = (void *)((unsigned long)phys); - dst->iov_len = frag_len; - - if (frag_len == len) - return niov; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} -#endif - -void -kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int nob) -{ - LASSERT (iov == NULL || kiov == NULL); - - memset(&tx->tx_rdma_md, 0, sizeof(tx->tx_rdma_md)); - - tx->tx_rdma_md.start = tx->tx_frags; - tx->tx_rdma_md.user_ptr = &tx->tx_rdma_eventarg; - tx->tx_rdma_md.eq_handle = kptllnd_data.kptl_eqh; - tx->tx_rdma_md.options = PTL_MD_LUSTRE_COMPLETION_SEMANTICS | - PTL_MD_EVENT_START_DISABLE; - switch (tx->tx_type) { - default: - LBUG(); - - case TX_TYPE_PUT_REQUEST: /* passive: peer gets */ - tx->tx_rdma_md.threshold = 1; /* GET event */ - tx->tx_rdma_md.options |= PTL_MD_OP_GET; - break; - - case TX_TYPE_GET_REQUEST: /* passive: peer puts */ - tx->tx_rdma_md.threshold = 1; /* PUT event */ - tx->tx_rdma_md.options |= PTL_MD_OP_PUT; - break; - - case TX_TYPE_PUT_RESPONSE: /* active: I get */ - tx->tx_rdma_md.threshold = 2; /* SEND + REPLY */ - break; - - case TX_TYPE_GET_RESPONSE: /* active: I put */ - tx->tx_rdma_md.threshold = tx->tx_acked ? 2 : 1; /* SEND + ACK? */ - break; - } - - if (nob == 0) { - tx->tx_rdma_md.length = 0; - return; - } - -#ifdef _USING_LUSTRE_PORTALS_ - if (iov != NULL) { - tx->tx_rdma_md.options |= PTL_MD_IOVEC; - tx->tx_rdma_md.length = - lnet_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov, - niov, iov, offset, nob); - return; - } - - /* Cheating OK since ptl_kiov_t == lnet_kiov_t */ - CLASSERT(sizeof(ptl_kiov_t) == sizeof(lnet_kiov_t)); - CLASSERT(offsetof(ptl_kiov_t, kiov_offset) == - offsetof(lnet_kiov_t, kiov_offset)); - CLASSERT(offsetof(ptl_kiov_t, kiov_page) == - offsetof(lnet_kiov_t, kiov_page)); - CLASSERT(offsetof(ptl_kiov_t, kiov_len) == - offsetof(lnet_kiov_t, kiov_len)); - - tx->tx_rdma_md.options |= PTL_MD_KIOV; - tx->tx_rdma_md.length = - lnet_extract_kiov(PTL_MD_MAX_IOV, tx->tx_frags->kiov, - niov, kiov, offset, nob); -#else - if (iov != NULL) { - tx->tx_rdma_md.options |= PTL_MD_IOVEC; - tx->tx_rdma_md.length = - kptllnd_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov, - niov, iov, offset, nob); - return; - } - - tx->tx_rdma_md.options |= PTL_MD_IOVEC | PTL_MD_PHYS; - tx->tx_rdma_md.length = - kptllnd_extract_phys(PTL_MD_MAX_IOV, tx->tx_frags->iov, - niov, kiov, offset, nob); -#endif -} - -int -kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, int nob) -{ - kptl_tx_t *tx; - ptl_err_t ptlrc; - kptl_msg_t *rxmsg = rx->rx_msg; - kptl_peer_t *peer = rx->rx_peer; - unsigned long flags; - ptl_handle_md_t mdh; - - LASSERT (type == TX_TYPE_PUT_RESPONSE || - type == TX_TYPE_GET_RESPONSE); - - tx = kptllnd_get_idle_tx(type); - if (tx == NULL) { - CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n", - type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT", - libcfs_id2str(peer->peer_id)); - return -ENOMEM; - } - - kptllnd_set_tx_peer(tx, peer); - kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob); - - ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, - PTL_UNLINK, &mdh); - if (ptlrc != PTL_OK) { - CERROR("PtlMDBind(%s) failed: %s(%d)\n", - libcfs_id2str(peer->peer_id), - kptllnd_errtype2str(ptlrc), ptlrc); - tx->tx_status = -EIO; - kptllnd_tx_decref(tx); - return -EIO; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - - tx->tx_lnet_msg = lntmsg; - /* lnet_finalize() will be called when tx is torn down, so I must - * return success from here on... */ - - tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ); - tx->tx_rdma_mdh = mdh; - tx->tx_active = 1; - list_add_tail(&tx->tx_list, &peer->peer_activeq); - - /* peer has now got my ref on 'tx' */ - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - tx->tx_tposted = jiffies; - - if (type == TX_TYPE_GET_RESPONSE) - ptlrc = PtlPut(mdh, - tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ, - rx->rx_initiator, - *kptllnd_tunables.kptl_portal, - 0, /* acl cookie */ - rxmsg->ptlm_u.rdma.kptlrm_matchbits, - 0, /* offset */ - (lntmsg != NULL) ? /* header data */ - PTLLND_RDMA_OK : - PTLLND_RDMA_FAIL); - else - ptlrc = PtlGet(mdh, - rx->rx_initiator, - *kptllnd_tunables.kptl_portal, - 0, /* acl cookie */ - rxmsg->ptlm_u.rdma.kptlrm_matchbits, - 0); /* offset */ - - if (ptlrc != PTL_OK) { - CERROR("Ptl%s failed: %s(%d)\n", - (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get", - kptllnd_errtype2str(ptlrc), ptlrc); - - kptllnd_peer_close(peer, -EIO); - /* Everything (including this RDMA) queued on the peer will - * be completed with failure */ - } - - return 0; -} - -int -kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kptl_peer_t *peer; - kptl_tx_t *tx; - int nob; - int nfrag; - int rc; - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); /* !!! */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - LASSERT (!in_interrupt()); - - rc = kptllnd_find_target(&peer, target); - if (rc != 0) - return rc; - - switch (type) { - default: - LBUG(); - return -EINVAL; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Should the payload avoid RDMA? */ - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[payload_nob]); - if (payload_kiov == NULL && - nob <= peer->peer_max_msg_size) - break; - - tx = kptllnd_get_idle_tx(TX_TYPE_PUT_REQUEST); - if (tx == NULL) { - CERROR("Can't send %s to %s: can't allocate descriptor\n", - lnet_msgtyp2str(type), - libcfs_id2str(target)); - rc = -ENOMEM; - goto out; - } - - kptllnd_init_rdma_md(tx, payload_niov, - payload_iov, payload_kiov, - payload_offset, payload_nob); - - tx->tx_lnet_msg = lntmsg; - tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr; - kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_PUT, - sizeof(kptl_rdma_msg_t)); - - CDEBUG(D_NETTRACE, "%s: passive PUT p %d %p\n", - libcfs_id2str(target), - le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx); - - kptllnd_tx_launch(peer, tx, 0); - goto out; - - case LNET_MSG_GET: - /* routed gets don't RDMA */ - if (target_is_router || routing) - break; - - /* Is the payload small enough not to need RDMA? */ - nob = lntmsg->msg_md->md_length; - nob = offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload[nob]); - if (nob <= peer->peer_max_msg_size) - break; - - tx = kptllnd_get_idle_tx(TX_TYPE_GET_REQUEST); - if (tx == NULL) { - CERROR("Can't send GET to %s: can't allocate descriptor\n", - libcfs_id2str(target)); - rc = -ENOMEM; - goto out; - } - - tx->tx_lnet_replymsg = - lnet_create_reply_msg(kptllnd_data.kptl_ni, lntmsg); - if (tx->tx_lnet_replymsg == NULL) { - CERROR("Failed to allocate LNET reply for %s\n", - libcfs_id2str(target)); - kptllnd_tx_decref(tx); - rc = -ENOMEM; - goto out; - } - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, NULL, - 0, lntmsg->msg_md->md_length); - else - kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov, - NULL, lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - - tx->tx_lnet_msg = lntmsg; - tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr; - kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_GET, - sizeof(kptl_rdma_msg_t)); - - CDEBUG(D_NETTRACE, "%s: passive GET p %d %p\n", - libcfs_id2str(target), - le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx); - - kptllnd_tx_launch(peer, tx, 0); - goto out; - - case LNET_MSG_ACK: - CDEBUG(D_NET, "LNET_MSG_ACK\n"); - LASSERT (payload_nob == 0); - break; - } - - /* I don't have to handle kiovs */ - LASSERT (payload_nob == 0 || payload_iov != NULL); - - tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (tx == NULL) { - CERROR("Can't send %s to %s: can't allocate descriptor\n", - lnet_msgtyp2str(type), libcfs_id2str(target)); - rc = -ENOMEM; - goto out; - } - - tx->tx_lnet_msg = lntmsg; - tx->tx_msg->ptlm_u.immediate.kptlim_hdr = *hdr; - - if (payload_nob == 0) { - nfrag = 0; - } else { - tx->tx_frags->iov[0].iov_base = tx->tx_msg; - tx->tx_frags->iov[0].iov_len = offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload); - - /* NB relying on lustre not asking for PTL_MD_MAX_IOV - * fragments!! */ -#ifdef _USING_LUSTRE_PORTALS_ - nfrag = 1 + lnet_extract_iov(PTL_MD_MAX_IOV - 1, - &tx->tx_frags->iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); -#else - nfrag = 1 + kptllnd_extract_iov(PTL_MD_MAX_IOV - 1, - &tx->tx_frags->iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); -#endif - } - - nob = offsetof(kptl_immediate_msg_t, kptlim_payload[payload_nob]); - kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_IMMEDIATE, nob); - - CDEBUG(D_NETTRACE, "%s: immediate %s p %d %p\n", - libcfs_id2str(target), - lnet_msgtyp2str(lntmsg->msg_type), - (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_PUT) ? - le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index) : - (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_GET) ? - le32_to_cpu(lntmsg->msg_hdr.msg.get.ptl_index) : -1, - tx); - - kptllnd_tx_launch(peer, tx, nfrag); - - out: - kptllnd_peer_decref(peer); - return rc; -} - -int -kptllnd_eager_recv(struct lnet_ni *ni, void *private, - lnet_msg_t *msg, void **new_privatep) -{ - kptl_rx_t *rx = private; - - CDEBUG(D_NET, "Eager RX=%p RXB=%p\n", rx, rx->rx_rxb); - - /* I have to release my ref on rxb (if I have one) to ensure I'm an - * eager receiver, so I copy the incoming request from the buffer it - * landed in, into space reserved in the descriptor... */ - -#if (PTL_MD_LOCAL_ALIGN8 == 0) - if (rx->rx_rxb == NULL) /* already copied */ - return 0; /* to fix alignment */ -#else - LASSERT(rx->rx_rxb != NULL); -#endif - LASSERT(rx->rx_nob <= *kptllnd_tunables.kptl_max_msg_size); - - memcpy(rx->rx_space, rx->rx_msg, rx->rx_nob); - rx->rx_msg = (kptl_msg_t *)rx->rx_space; - - kptllnd_rx_buffer_decref(rx->rx_rxb); - rx->rx_rxb = NULL; - - return 0; -} - - -int -kptllnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kptl_rx_t *rx = private; - kptl_msg_t *rxmsg = rx->rx_msg; - int nob; - int rc; - - CDEBUG(D_NET, "%s niov=%d offset=%d mlen=%d rlen=%d\n", - kptllnd_msgtype2str(rxmsg->ptlm_type), - niov, offset, mlen, rlen); - - LASSERT (mlen <= rlen); - LASSERT (mlen >= 0); - LASSERT (!in_interrupt()); - LASSERT (!(kiov != NULL && iov != NULL)); /* never both */ - LASSERT (niov <= PTL_MD_MAX_IOV); /* !!! */ - -#ifdef CRAY_XT3 - if (lntmsg != NULL && - rx->rx_uid != 0) { - /* Set the UID if the sender's uid isn't 0; i.e. non-root - * running in userspace (e.g. a catamount node; linux kernel - * senders, including routers have uid 0). If this is a lustre - * RPC request, this tells lustre not to trust the creds in the - * RPC message body. */ - lnet_set_msg_uid(ni, lntmsg, rx->rx_uid); - } -#endif - switch(rxmsg->ptlm_type) - { - default: - LBUG(); - rc = -EINVAL; - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE %d,%d\n", mlen, rlen); - - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_id2str(rx->rx_peer->peer_id), nob, - rx->rx_nob); - rc = -EINVAL; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov( - niov, kiov, offset, - *kptllnd_tunables.kptl_max_msg_size, - rxmsg->ptlm_u.immediate.kptlim_payload, - 0, - mlen); - else - lnet_copy_flat2iov( - niov, iov, offset, - *kptllnd_tunables.kptl_max_msg_size, - rxmsg->ptlm_u.immediate.kptlim_payload, - 0, - mlen); - - lnet_finalize (ni, lntmsg, 0); - rc = 0; - break; - - case PTLLND_MSG_TYPE_GET: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_GET %d,%d\n", mlen, rlen); - - /* NB always send RDMA so the peer can complete. I send - * success/failure in the portals 'hdr_data' */ - - if (lntmsg == NULL) - rc = kptllnd_active_rdma(rx, NULL, - TX_TYPE_GET_RESPONSE, - 0, NULL, NULL, 0, 0); - else - rc = kptllnd_active_rdma(rx, lntmsg, - TX_TYPE_GET_RESPONSE, - lntmsg->msg_niov, - lntmsg->msg_iov, - lntmsg->msg_kiov, - lntmsg->msg_offset, - lntmsg->msg_len); - break; - - case PTLLND_MSG_TYPE_PUT: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_PUT %d,%d\n", mlen, rlen); - - /* NB always send RDMA so the peer can complete; it'll be 0 - * bytes if there was no match (lntmsg == NULL). I have no way - * to let my peer know this, but she's only interested in when - * the net has stopped accessing her buffer in any case. */ - - rc = kptllnd_active_rdma(rx, lntmsg, TX_TYPE_PUT_RESPONSE, - niov, iov, kiov, offset, mlen); - break; - } - - /* - * We're done with the RX - */ - kptllnd_rx_done(rx, PTLLND_POSTRX_PEER_CREDIT); - return rc; -} - -void -kptllnd_eq_callback(ptl_event_t *ev) -{ - kptl_eventarg_t *eva = ev->md.user_ptr; - - switch (eva->eva_type) { - default: - LBUG(); - - case PTLLND_EVENTARG_TYPE_MSG: - case PTLLND_EVENTARG_TYPE_RDMA: - kptllnd_tx_callback(ev); - break; - - case PTLLND_EVENTARG_TYPE_BUF: - kptllnd_rx_buffer_callback(ev); - break; - } -} - -void -kptllnd_thread_fini (void) -{ - atomic_dec(&kptllnd_data.kptl_nthreads); -} - -int -kptllnd_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid; - - atomic_inc(&kptllnd_data.kptl_nthreads); - - pid = kernel_thread (fn, arg, 0); - if (pid >= 0) - return 0; - - CERROR("Failed to start kernel_thread: error %d\n", (int)pid); - kptllnd_thread_fini(); - return (int)pid; -} - -int -kptllnd_watchdog(void *arg) -{ - int id = (long)arg; - char name[16]; - wait_queue_t waitlink; - int stamp = 0; - int peer_index = 0; - unsigned long deadline = jiffies; - int timeout; - int i; - - snprintf(name, sizeof(name), "kptllnd_wd_%02d", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&waitlink, current); - - /* threads shut down in phase 2 after all peers have been destroyed */ - while (kptllnd_data.kptl_shutdown < 2) { - - timeout = (int)(deadline - jiffies); - - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kptllnd_data.kptl_peer_hash_size; - - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if ((*kptllnd_tunables.kptl_timeout) > n * p) - chunk = (chunk * n * p) / - (*kptllnd_tunables.kptl_timeout); - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kptllnd_peer_check_bucket(peer_index, stamp); - peer_index = (peer_index + 1) % - kptllnd_data.kptl_peer_hash_size; - } - - deadline += p * HZ; - stamp++; - continue; - } - - kptllnd_handle_closing_peers(); - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kptllnd_data.kptl_watchdog_waitq, - &waitlink); - - schedule_timeout(timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue(&kptllnd_data.kptl_watchdog_waitq, &waitlink); - } - - kptllnd_thread_fini(); - CDEBUG(D_NET, "<<<\n"); - return (0); -}; - -int -kptllnd_scheduler (void *arg) -{ - int id = (long)arg; - char name[16]; - wait_queue_t waitlink; - unsigned long flags; - int did_something; - int counter = 0; - kptl_rx_t *rx; - kptl_rx_buffer_t *rxb; - kptl_tx_t *tx; - - snprintf(name, sizeof(name), "kptllnd_sd_%02d", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&waitlink, current); - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - - /* threads shut down in phase 2 after all peers have been destroyed */ - while (kptllnd_data.kptl_shutdown < 2) { - - did_something = 0; - - if (!list_empty(&kptllnd_data.kptl_sched_rxq)) { - rx = list_entry (kptllnd_data.kptl_sched_rxq.next, - kptl_rx_t, rx_list); - list_del(&rx->rx_list); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, - flags); - - kptllnd_rx_parse(rx); - did_something = 1; - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - } - - if (!list_empty(&kptllnd_data.kptl_sched_rxbq)) { - rxb = list_entry (kptllnd_data.kptl_sched_rxbq.next, - kptl_rx_buffer_t, rxb_repost_list); - list_del(&rxb->rxb_repost_list); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, - flags); - - kptllnd_rx_buffer_post(rxb); - did_something = 1; - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - } - - if (!list_empty(&kptllnd_data.kptl_sched_txq)) { - tx = list_entry (kptllnd_data.kptl_sched_txq.next, - kptl_tx_t, tx_list); - list_del_init(&tx->tx_list); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags); - - kptllnd_tx_fini(tx); - did_something = 1; - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - } - - if (did_something) { - if (++counter != *kptllnd_tunables.kptl_reschedule_loops) - continue; - } - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kptllnd_data.kptl_sched_waitq, - &waitlink); - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags); - - if (!did_something) - schedule(); - else - cond_resched(); - - set_current_state(TASK_RUNNING); - remove_wait_queue(&kptllnd_data.kptl_sched_waitq, &waitlink); - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - - counter = 0; - } - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags); - - kptllnd_thread_fini(); - return 0; -} - diff --git a/lnet/klnds/ptllnd/ptllnd_modparams.c b/lnet/klnds/ptllnd/ptllnd_modparams.c deleted file mode 100644 index 16e0c4afa5a9a9c47806070d58cb796588d17006..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_modparams.c +++ /dev/null @@ -1,333 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - - -#include "ptllnd.h" - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of TX descriptors"); - -static int max_nodes = 1152; -CFS_MODULE_PARM(max_nodes, "i", int, 0444, - "maximum number of peer nodes"); - -static int max_procs_per_node = 2; -CFS_MODULE_PARM(max_procs_per_node, "i", int, 0444, - "maximum number of processes per peer node to cache"); - -static int checksum = 0; -CFS_MODULE_PARM(checksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int portal = PTLLND_PORTAL; /* <lnet/ptllnd_wire.h> */ -CFS_MODULE_PARM(portal, "i", int, 0444, - "portal id"); - -static int pid = PTLLND_PID; /* <lnet/ptllnd_wire.h> */ -CFS_MODULE_PARM(pid, "i", int, 0444, - "portals pid"); - -static int rxb_npages = 1; -CFS_MODULE_PARM(rxb_npages, "i", int, 0444, - "# of pages per rx buffer"); - -static int rxb_nspare = 8; -CFS_MODULE_PARM(rxb_nspare, "i", int, 0444, - "# of spare rx buffers"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "concurrent sends"); - -static int peercredits = PTLLND_PEERCREDITS; /* <lnet/ptllnd_wire.h> */ -CFS_MODULE_PARM(peercredits, "i", int, 0444, - "concurrent sends to 1 peer"); - -static int max_msg_size = PTLLND_MAX_KLND_MSG_SIZE; /* <lnet/ptllnd_wire.h> */ -CFS_MODULE_PARM(max_msg_size, "i", int, 0444, - "max size of immediate message"); - -static int peer_hash_table_size = 101; -CFS_MODULE_PARM(peer_hash_table_size, "i", int, 0444, - "# of slots in the peer hash table"); - -static int reschedule_loops = 100; -CFS_MODULE_PARM(reschedule_loops, "i", int, 0644, - "# of loops before scheduler does cond_resched()"); - -static int ack_puts = 0; -CFS_MODULE_PARM(ack_puts, "i", int, 0644, - "get portals to ack all PUTs"); - -#ifdef CRAY_XT3 -static int ptltrace_on_timeout = 0; -CFS_MODULE_PARM(ptltrace_on_timeout, "i", int, 0644, - "dump ptltrace on timeout"); - -static char *ptltrace_basename = "/tmp/lnet-ptltrace"; -CFS_MODULE_PARM(ptltrace_basename, "s", charp, 0644, - "ptltrace dump file basename"); -#endif -#ifdef PJK_DEBUGGING -static int simulation_bitmap = 0; -CFS_MODULE_PARM(simulation_bitmap, "i", int, 0444, - "simulation bitmap"); -#endif - - -kptl_tunables_t kptllnd_tunables = { - .kptl_ntx = &ntx, - .kptl_max_nodes = &max_nodes, - .kptl_max_procs_per_node = &max_procs_per_node, - .kptl_checksum = &checksum, - .kptl_portal = &portal, - .kptl_pid = &pid, - .kptl_timeout = &timeout, - .kptl_rxb_npages = &rxb_npages, - .kptl_rxb_nspare = &rxb_nspare, - .kptl_credits = &credits, - .kptl_peercredits = &peercredits, - .kptl_max_msg_size = &max_msg_size, - .kptl_peer_hash_table_size = &peer_hash_table_size, - .kptl_reschedule_loops = &reschedule_loops, - .kptl_ack_puts = &ack_puts, -#ifdef CRAY_XT3 - .kptl_ptltrace_on_timeout = &ptltrace_on_timeout, - .kptl_ptltrace_basename = &ptltrace_basename, -#endif -#ifdef PJK_DEBUGGING - .kptl_simulation_bitmap = &simulation_bitmap, -#endif -}; - - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM -#ifdef CRAY_XT3 -static char ptltrace_basename_space[1024]; - -static void -kptllnd_init_strtunable(char **str_param, char *space, int size) -{ - strncpy(space, *str_param, size); - space[size - 1] = 0; - *str_param = space; -} -#endif - -static cfs_sysctl_table_t kptllnd_ctl_table[] = { - { - .ctl_name = 1, - .procname = "ntx", - .data = &ntx, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 2, - .procname = "max_nodes", - .data = &max_nodes, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 3, - .procname = "max_procs_per_node", - .data = &max_procs_per_node, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 4, - .procname = "checksum", - .data = &checksum, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 5, - .procname = "timeout", - .data = &timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 6, - .procname = "portal", - .data = &portal, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 7, - .procname = "pid", - .data = &pid, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 8, - .procname = "rxb_npages", - .data = &rxb_npages, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 9, - .procname = "credits", - .data = &credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 10, - .procname = "peercredits", - .data = &peercredits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 11, - .procname = "max_msg_size", - .data = &max_msg_size, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 12, - .procname = "peer_hash_table_size", - .data = &peer_hash_table_size, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 13, - .procname = "reschedule_loops", - .data = &reschedule_loops, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 14, - .procname = "ack_puts", - .data = &ack_puts, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, -#ifdef CRAY_XT3 - { - .ctl_name = 15, - .procname = "ptltrace_on_timeout", - .data = &ptltrace_on_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 16, - .procname = "ptltrace_basename", - .data = ptltrace_basename_space, - .maxlen = sizeof(ptltrace_basename_space), - .mode = 0644, - .proc_handler = &proc_dostring, - .strategy = &sysctl_string - }, -#endif -#ifdef PJK_DEBUGGING - { - .ctl_name = 17, - .procname = "simulation_bitmap", - .data = &simulation_bitmap, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, -#endif - - {0} -}; - -static cfs_sysctl_table_t kptllnd_top_ctl_table[] = { - { - .ctl_name = 203, - .procname = "ptllnd", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = kptllnd_ctl_table - }, - {0} -}; - -int -kptllnd_tunables_init () -{ -#ifdef CRAY_XT3 - kptllnd_init_strtunable(&ptltrace_basename, - ptltrace_basename_space, - sizeof(ptltrace_basename_space)); -#endif - kptllnd_tunables.kptl_sysctl = - cfs_register_sysctl_table(kptllnd_top_ctl_table, 0); - - if (kptllnd_tunables.kptl_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -kptllnd_tunables_fini () -{ - if (kptllnd_tunables.kptl_sysctl != NULL) - cfs_unregister_sysctl_table(kptllnd_tunables.kptl_sysctl); -} - -#else - -int -kptllnd_tunables_init () -{ - return 0; -} - -void -kptllnd_tunables_fini () -{ -} - -#endif - diff --git a/lnet/klnds/ptllnd/ptllnd_peer.c b/lnet/klnds/ptllnd/ptllnd_peer.c deleted file mode 100644 index f4e67f4bd72ad609353b9101ae88186a0cd92748..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_peer.c +++ /dev/null @@ -1,1334 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * E Barton <eeb@bartonsoftware.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" -#include <libcfs/list.h> - -static int -kptllnd_count_queue(struct list_head *q) -{ - struct list_head *e; - int n = 0; - - list_for_each(e, q) { - n++; - } - - return n; -} - -int -kptllnd_get_peer_info(int index, - lnet_process_id_t *id, - int *state, int *sent_hello, - int *refcount, __u64 *incarnation, - __u64 *next_matchbits, __u64 *last_matchbits_seen, - int *nsendq, int *nactiveq, - int *credits, int *outstanding_credits) -{ - rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; - unsigned long flags; - struct list_head *ptmp; - kptl_peer_t *peer; - int i; - int rc = -ENOENT; - - read_lock_irqsave(g_lock, flags); - - for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) { - - list_for_each (ptmp, &kptllnd_data.kptl_peers[i]) { - peer = list_entry(ptmp, kptl_peer_t, peer_list); - - if (index-- > 0) - continue; - - *id = peer->peer_id; - *state = peer->peer_state; - *sent_hello = peer->peer_sent_hello; - *refcount = atomic_read(&peer->peer_refcount); - *incarnation = peer->peer_incarnation; - - spin_lock(&peer->peer_lock); - - *next_matchbits = peer->peer_next_matchbits; - *last_matchbits_seen = peer->peer_last_matchbits_seen; - *credits = peer->peer_credits; - *outstanding_credits = peer->peer_outstanding_credits; - - *nsendq = kptllnd_count_queue(&peer->peer_sendq); - *nactiveq = kptllnd_count_queue(&peer->peer_activeq); - - spin_unlock(&peer->peer_lock); - - rc = 0; - goto out; - } - } - - out: - read_unlock_irqrestore(g_lock, flags); - return rc; -} - -void -kptllnd_peer_add_peertable_locked (kptl_peer_t *peer) -{ - LASSERT (!kptllnd_data.kptl_shutdown); - LASSERT (kptllnd_data.kptl_n_active_peers < - kptllnd_data.kptl_expected_peers); - - LASSERT (peer->peer_state == PEER_STATE_WAITING_HELLO || - peer->peer_state == PEER_STATE_ACTIVE); - - kptllnd_data.kptl_n_active_peers++; - atomic_inc(&peer->peer_refcount); /* +1 ref for the list */ - - /* NB add to HEAD of peer list for MRU order! - * (see kptllnd_cull_peertable) */ - list_add(&peer->peer_list, kptllnd_nid2peerlist(peer->peer_id.nid)); -} - -void -kptllnd_cull_peertable_locked (lnet_process_id_t pid) -{ - /* I'm about to add a new peer with this portals ID to the peer table, - * so (a) this peer should not exist already and (b) I want to leave at - * most (max_procs_per_nid - 1) peers with this NID in the table. */ - struct list_head *peers = kptllnd_nid2peerlist(pid.nid); - int cull_count = *kptllnd_tunables.kptl_max_procs_per_node; - int count; - struct list_head *tmp; - struct list_head *nxt; - kptl_peer_t *peer; - - count = 0; - list_for_each_safe (tmp, nxt, peers) { - /* NB I rely on kptllnd_peer_add_peertable_locked to add peers - * in MRU order */ - peer = list_entry(tmp, kptl_peer_t, peer_list); - - if (peer->peer_id.nid != pid.nid) - continue; - - LASSERT (peer->peer_id.pid != pid.pid); - - count++; - - if (count < cull_count) /* recent (don't cull) */ - continue; - - CDEBUG(D_NET, "Cull %s(%s)\n", - libcfs_id2str(peer->peer_id), - kptllnd_ptlid2str(peer->peer_ptlid)); - - kptllnd_peer_close_locked(peer, 0); - } -} - -kptl_peer_t * -kptllnd_peer_allocate (lnet_process_id_t lpid, ptl_process_id_t ppid) -{ - unsigned long flags; - kptl_peer_t *peer; - - LIBCFS_ALLOC(peer, sizeof (*peer)); - if (peer == NULL) { - CERROR("Can't create peer %s (%s)\n", - libcfs_id2str(lpid), - kptllnd_ptlid2str(ppid)); - return NULL; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - INIT_LIST_HEAD (&peer->peer_noops); - INIT_LIST_HEAD (&peer->peer_sendq); - INIT_LIST_HEAD (&peer->peer_activeq); - spin_lock_init (&peer->peer_lock); - - peer->peer_state = PEER_STATE_ALLOCATED; - peer->peer_error = 0; - peer->peer_last_alive = cfs_time_current(); - peer->peer_id = lpid; - peer->peer_ptlid = ppid; - peer->peer_credits = 1; /* enough for HELLO */ - peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS; - peer->peer_outstanding_credits = *kptllnd_tunables.kptl_peercredits - 1; - peer->peer_sent_credits = 1; /* HELLO credit is implicit */ - peer->peer_max_msg_size = PTLLND_MIN_BUFFER_SIZE; /* until we know better */ - - atomic_set(&peer->peer_refcount, 1); /* 1 ref for caller */ - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - peer->peer_myincarnation = kptllnd_data.kptl_incarnation; - - /* Only increase # peers under lock, to guarantee we dont grow it - * during shutdown */ - if (kptllnd_data.kptl_shutdown) { - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - LIBCFS_FREE(peer, sizeof(*peer)); - return NULL; - } - - kptllnd_data.kptl_npeers++; - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - return peer; -} - -void -kptllnd_peer_destroy (kptl_peer_t *peer) -{ - unsigned long flags; - - CDEBUG(D_NET, "Peer=%p\n", peer); - - LASSERT (!in_interrupt()); - LASSERT (atomic_read(&peer->peer_refcount) == 0); - LASSERT (peer->peer_state == PEER_STATE_ALLOCATED || - peer->peer_state == PEER_STATE_ZOMBIE); - LASSERT (list_empty(&peer->peer_noops)); - LASSERT (list_empty(&peer->peer_sendq)); - LASSERT (list_empty(&peer->peer_activeq)); - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - if (peer->peer_state == PEER_STATE_ZOMBIE) - list_del(&peer->peer_list); - - kptllnd_data.kptl_npeers--; - - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - LIBCFS_FREE (peer, sizeof (*peer)); -} - -void -kptllnd_cancel_txlist (struct list_head *peerq, struct list_head *txs) -{ - struct list_head *tmp; - struct list_head *nxt; - kptl_tx_t *tx; - - list_for_each_safe (tmp, nxt, peerq) { - tx = list_entry(tmp, kptl_tx_t, tx_list); - - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, txs); - - tx->tx_status = -EIO; - tx->tx_active = 0; - } -} - -void -kptllnd_peer_cancel_txs(kptl_peer_t *peer, struct list_head *txs) -{ - unsigned long flags; - - spin_lock_irqsave(&peer->peer_lock, flags); - - kptllnd_cancel_txlist(&peer->peer_noops, txs); - kptllnd_cancel_txlist(&peer->peer_sendq, txs); - kptllnd_cancel_txlist(&peer->peer_activeq, txs); - - spin_unlock_irqrestore(&peer->peer_lock, flags); -} - -void -kptllnd_peer_alive (kptl_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->peer_last_alive = cfs_time_current(); - mb(); -} - -void -kptllnd_peer_notify (kptl_peer_t *peer) -{ - unsigned long flags; - time_t last_alive = 0; - int error = 0; - - spin_lock_irqsave(&peer->peer_lock, flags); - - if (peer->peer_error != 0) { - error = peer->peer_error; - peer->peer_error = 0; - - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->peer_last_alive); - } - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - if (error != 0) - lnet_notify (kptllnd_data.kptl_ni, peer->peer_id.nid, 0, - last_alive); -} - -void -kptllnd_handle_closing_peers () -{ - unsigned long flags; - struct list_head txs; - kptl_peer_t *peer; - struct list_head *tmp; - struct list_head *nxt; - kptl_tx_t *tx; - int idle; - - /* Check with a read lock first to avoid blocking anyone */ - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - idle = list_empty(&kptllnd_data.kptl_closing_peers) && - list_empty(&kptllnd_data.kptl_zombie_peers); - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - if (idle) - return; - - INIT_LIST_HEAD(&txs); - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - /* Cancel txs on all zombie peers. NB anyone dropping the last peer - * ref removes it from this list, so I musn't drop the lock while - * scanning it. */ - list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) { - peer = list_entry (tmp, kptl_peer_t, peer_list); - - LASSERT (peer->peer_state == PEER_STATE_ZOMBIE); - - kptllnd_peer_cancel_txs(peer, &txs); - } - - /* Notify LNET and cancel txs on closing (i.e. newly closed) peers. NB - * I'm the only one removing from this list, but peers can be added on - * the end any time I drop the lock. */ - - list_for_each_safe (tmp, nxt, &kptllnd_data.kptl_closing_peers) { - peer = list_entry (tmp, kptl_peer_t, peer_list); - - LASSERT (peer->peer_state == PEER_STATE_CLOSING); - - list_del(&peer->peer_list); - list_add_tail(&peer->peer_list, - &kptllnd_data.kptl_zombie_peers); - peer->peer_state = PEER_STATE_ZOMBIE; - - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - kptllnd_peer_notify(peer); - kptllnd_peer_cancel_txs(peer, &txs); - kptllnd_peer_decref(peer); - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - } - - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - /* Drop peer's ref on all cancelled txs. This will get - * kptllnd_tx_fini() to abort outstanding comms if necessary. */ - - list_for_each_safe (tmp, nxt, &txs) { - tx = list_entry(tmp, kptl_tx_t, tx_list); - list_del(&tx->tx_list); - kptllnd_tx_decref(tx); - } -} - -void -kptllnd_peer_close_locked(kptl_peer_t *peer, int why) -{ - switch (peer->peer_state) { - default: - LBUG(); - - case PEER_STATE_WAITING_HELLO: - case PEER_STATE_ACTIVE: - /* Ensure new peers see a new incarnation of me */ - LASSERT(peer->peer_myincarnation <= kptllnd_data.kptl_incarnation); - if (peer->peer_myincarnation == kptllnd_data.kptl_incarnation) - kptllnd_data.kptl_incarnation++; - - /* Removing from peer table */ - kptllnd_data.kptl_n_active_peers--; - LASSERT (kptllnd_data.kptl_n_active_peers >= 0); - - list_del(&peer->peer_list); - kptllnd_peer_unreserve_buffers(); - - peer->peer_error = why; /* stash 'why' only on first close */ - peer->peer_state = PEER_STATE_CLOSING; - - /* Schedule for immediate attention, taking peer table's ref */ - list_add_tail(&peer->peer_list, - &kptllnd_data.kptl_closing_peers); - wake_up(&kptllnd_data.kptl_watchdog_waitq); - break; - - case PEER_STATE_ZOMBIE: - case PEER_STATE_CLOSING: - break; - } -} - -void -kptllnd_peer_close(kptl_peer_t *peer, int why) -{ - unsigned long flags; - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - kptllnd_peer_close_locked(peer, why); - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); -} - -int -kptllnd_peer_del(lnet_process_id_t id) -{ - struct list_head *ptmp; - struct list_head *pnxt; - kptl_peer_t *peer; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; - - /* - * Find the single bucket we are supposed to look at or if nid is a - * wildcard (LNET_NID_ANY) then look at all of the buckets - */ - if (id.nid != LNET_NID_ANY) { - struct list_head *l = kptllnd_nid2peerlist(id.nid); - - lo = hi = l - kptllnd_data.kptl_peers; - } else { - if (id.pid != LNET_PID_ANY) - return -EINVAL; - - lo = 0; - hi = kptllnd_data.kptl_peer_hash_size - 1; - } - -again: - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kptllnd_data.kptl_peers[i]) { - peer = list_entry (ptmp, kptl_peer_t, peer_list); - - if (!(id.nid == LNET_NID_ANY || - (peer->peer_id.nid == id.nid && - (id.pid == LNET_PID_ANY || - peer->peer_id.pid == id.pid)))) - continue; - - kptllnd_peer_addref(peer); /* 1 ref for me... */ - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - kptllnd_peer_close(peer, 0); - kptllnd_peer_decref(peer); /* ...until here */ - - rc = 0; /* matched something */ - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - return (rc); -} - -void -kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag) -{ - /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */ - ptl_handle_md_t msg_mdh; - ptl_md_t md; - ptl_err_t prc; - unsigned long flags; - - LASSERT (!tx->tx_idle); - LASSERT (!tx->tx_active); - LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - LASSERT (tx->tx_type == TX_TYPE_SMALL_MESSAGE || - tx->tx_type == TX_TYPE_PUT_REQUEST || - tx->tx_type == TX_TYPE_GET_REQUEST); - - kptllnd_set_tx_peer(tx, peer); - - memset(&md, 0, sizeof(md)); - - md.threshold = tx->tx_acked ? 2 : 1; /* SEND END + ACK? */ - md.options = PTL_MD_OP_PUT | - PTL_MD_LUSTRE_COMPLETION_SEMANTICS | - PTL_MD_EVENT_START_DISABLE; - md.user_ptr = &tx->tx_msg_eventarg; - md.eq_handle = kptllnd_data.kptl_eqh; - - if (nfrag == 0) { - md.start = tx->tx_msg; - md.length = tx->tx_msg->ptlm_nob; - } else { - LASSERT (nfrag > 1); - LASSERT (tx->tx_frags->iov[0].iov_base == (void *)tx->tx_msg); - - md.start = tx->tx_frags; - md.length = nfrag; - md.options |= PTL_MD_IOVEC; - } - - prc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &msg_mdh); - if (prc != PTL_OK) { - CERROR("PtlMDBind(%s) failed: %s(%d)\n", - libcfs_id2str(peer->peer_id), - kptllnd_errtype2str(prc), prc); - tx->tx_status = -EIO; - kptllnd_tx_decref(tx); - return; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - - tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ); - tx->tx_active = 1; - tx->tx_msg_mdh = msg_mdh; - - /* Ensure HELLO is sent first */ - if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_NOOP) - list_add(&tx->tx_list, &peer->peer_noops); - else if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) - list_add(&tx->tx_list, &peer->peer_sendq); - else - list_add_tail(&tx->tx_list, &peer->peer_sendq); - - spin_unlock_irqrestore(&peer->peer_lock, flags); -} - -static inline int -kptllnd_peer_send_noop (kptl_peer_t *peer) -{ - if (!peer->peer_sent_hello || - peer->peer_credits == 0 || - !list_empty(&peer->peer_noops) || - peer->peer_outstanding_credits < PTLLND_CREDIT_HIGHWATER) - return 0; - - /* No tx to piggyback NOOP onto or no credit to send a tx */ - return (list_empty(&peer->peer_sendq) || peer->peer_credits == 1); -} - -void -kptllnd_peer_check_sends (kptl_peer_t *peer) -{ - ptl_handle_me_t meh; - kptl_tx_t *tx; - int rc; - int msg_type; - unsigned long flags; - - LASSERT(!in_interrupt()); - - spin_lock_irqsave(&peer->peer_lock, flags); - - peer->peer_retry_noop = 0; - - if (kptllnd_peer_send_noop(peer)) { - /* post a NOOP to return credits */ - spin_unlock_irqrestore(&peer->peer_lock, flags); - - tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (tx == NULL) { - CERROR("Can't return credits to %s: can't allocate descriptor\n", - libcfs_id2str(peer->peer_id)); - } else { - kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_NOOP, 0); - kptllnd_post_tx(peer, tx, 0); - } - - spin_lock_irqsave(&peer->peer_lock, flags); - peer->peer_retry_noop = (tx == NULL); - } - - for (;;) { - if (!list_empty(&peer->peer_noops)) { - LASSERT (peer->peer_sent_hello); - tx = list_entry(peer->peer_noops.next, - kptl_tx_t, tx_list); - } else if (!list_empty(&peer->peer_sendq)) { - tx = list_entry(peer->peer_sendq.next, - kptl_tx_t, tx_list); - } else { - /* nothing to send right now */ - break; - } - - LASSERT (tx->tx_active); - LASSERT (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - - LASSERT (peer->peer_outstanding_credits >= 0); - LASSERT (peer->peer_sent_credits >= 0); - LASSERT (peer->peer_sent_credits + - peer->peer_outstanding_credits <= - *kptllnd_tunables.kptl_peercredits); - LASSERT (peer->peer_credits >= 0); - - msg_type = tx->tx_msg->ptlm_type; - - /* Ensure HELLO is sent first */ - if (!peer->peer_sent_hello) { - LASSERT (list_empty(&peer->peer_noops)); - if (msg_type != PTLLND_MSG_TYPE_HELLO) - break; - peer->peer_sent_hello = 1; - } - - if (peer->peer_credits == 0) { - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: no credits for %s[%p]\n", - libcfs_id2str(peer->peer_id), - peer->peer_credits, - peer->peer_outstanding_credits, - peer->peer_sent_credits, - kptllnd_msgtype2str(msg_type), tx); - break; - } - - /* Last/Initial credit reserved for NOOP/HELLO */ - if (peer->peer_credits == 1 && - msg_type != PTLLND_MSG_TYPE_HELLO && - msg_type != PTLLND_MSG_TYPE_NOOP) { - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: " - "not using last credit for %s[%p]\n", - libcfs_id2str(peer->peer_id), - peer->peer_credits, - peer->peer_outstanding_credits, - peer->peer_sent_credits, - kptllnd_msgtype2str(msg_type), tx); - break; - } - - list_del(&tx->tx_list); - - /* Discard any NOOP I queued if I'm not at the high-water mark - * any more or more messages have been queued */ - if (msg_type == PTLLND_MSG_TYPE_NOOP && - !kptllnd_peer_send_noop(peer)) { - tx->tx_active = 0; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_id2str(peer->peer_id)); - kptllnd_tx_decref(tx); - - spin_lock_irqsave(&peer->peer_lock, flags); - continue; - } - - /* fill last-minute msg fields */ - kptllnd_msg_pack(tx->tx_msg, peer); - - if (tx->tx_type == TX_TYPE_PUT_REQUEST || - tx->tx_type == TX_TYPE_GET_REQUEST) { - /* peer_next_matchbits must be known good */ - LASSERT (peer->peer_state >= PEER_STATE_ACTIVE); - /* Assume 64-bit matchbits can't wrap */ - LASSERT (peer->peer_next_matchbits >= PTL_RESERVED_MATCHBITS); - tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits = - peer->peer_next_matchbits++; - } - - peer->peer_sent_credits += peer->peer_outstanding_credits; - peer->peer_outstanding_credits = 0; - peer->peer_credits--; - - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s tx=%p nob=%d cred=%d\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - kptllnd_msgtype2str(msg_type), tx, tx->tx_msg->ptlm_nob, - tx->tx_msg->ptlm_credits); - - list_add_tail(&tx->tx_list, &peer->peer_activeq); - - kptllnd_tx_addref(tx); /* 1 ref for me... */ - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - if (tx->tx_type == TX_TYPE_PUT_REQUEST || - tx->tx_type == TX_TYPE_GET_REQUEST) { - /* Post bulk now we have safe matchbits */ - rc = PtlMEAttach(kptllnd_data.kptl_nih, - *kptllnd_tunables.kptl_portal, - peer->peer_ptlid, - tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits, - 0, /* ignore bits */ - PTL_UNLINK, - PTL_INS_BEFORE, - &meh); - if (rc != PTL_OK) { - CERROR("PtlMEAttach(%s) failed: %s(%d)\n", - libcfs_id2str(peer->peer_id), - kptllnd_errtype2str(rc), rc); - goto failed; - } - - rc = PtlMDAttach(meh, tx->tx_rdma_md, PTL_UNLINK, - &tx->tx_rdma_mdh); - if (rc != PTL_OK) { - CERROR("PtlMDAttach(%s) failed: %s(%d)\n", - libcfs_id2str(tx->tx_peer->peer_id), - kptllnd_errtype2str(rc), rc); - rc = PtlMEUnlink(meh); - LASSERT(rc == PTL_OK); - tx->tx_rdma_mdh = PTL_INVALID_HANDLE; - goto failed; - } - /* I'm not racing with the event callback here. It's a - * bug if there's an event on the MD I just attached - * before I actually send the RDMA request message - - * probably matchbits re-used in error. */ - } - - tx->tx_tposted = jiffies; /* going on the wire */ - - rc = PtlPut (tx->tx_msg_mdh, - tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ, - peer->peer_ptlid, - *kptllnd_tunables.kptl_portal, - 0, /* acl cookie */ - LNET_MSG_MATCHBITS, - 0, /* offset */ - 0); /* header data */ - if (rc != PTL_OK) { - CERROR("PtlPut %s error %s(%d)\n", - libcfs_id2str(peer->peer_id), - kptllnd_errtype2str(rc), rc); - goto failed; - } - - kptllnd_tx_decref(tx); /* drop my ref */ - - spin_lock_irqsave(&peer->peer_lock, flags); - } - - spin_unlock_irqrestore(&peer->peer_lock, flags); - return; - - failed: - /* Nuke everything (including tx we were trying) */ - kptllnd_peer_close(peer, -EIO); - kptllnd_tx_decref(tx); -} - -kptl_tx_t * -kptllnd_find_timed_out_tx(kptl_peer_t *peer) -{ - kptl_tx_t *tx; - struct list_head *ele; - - list_for_each(ele, &peer->peer_sendq) { - tx = list_entry(ele, kptl_tx_t, tx_list); - - if (time_after_eq(jiffies, tx->tx_deadline)) { - kptllnd_tx_addref(tx); - return tx; - } - } - - list_for_each(ele, &peer->peer_activeq) { - tx = list_entry(ele, kptl_tx_t, tx_list); - - if (time_after_eq(jiffies, tx->tx_deadline)) { - kptllnd_tx_addref(tx); - return tx; - } - } - - return NULL; -} - - -void -kptllnd_peer_check_bucket (int idx, int stamp) -{ - struct list_head *peers = &kptllnd_data.kptl_peers[idx]; - struct list_head *ptmp; - kptl_peer_t *peer; - kptl_tx_t *tx; - unsigned long flags; - int nsend; - int nactive; - int check_sends; - - CDEBUG(D_NET, "Bucket=%d, stamp=%d\n", idx, stamp); - - again: - /* NB. Shared lock while I just look */ - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kptl_peer_t, peer_list); - - CDEBUG(D_NET, "Peer=%s Credits=%d Outstanding=%d Send=%d\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits); - - spin_lock(&peer->peer_lock); - - if (peer->peer_check_stamp == stamp) { - /* checked already this pass */ - spin_unlock(&peer->peer_lock); - continue; - } - - peer->peer_check_stamp = stamp; - tx = kptllnd_find_timed_out_tx(peer); - check_sends = peer->peer_retry_noop; - - spin_unlock(&peer->peer_lock); - - if (tx == NULL && !check_sends) - continue; - - kptllnd_peer_addref(peer); /* 1 ref for me... */ - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - if (tx == NULL) { /* nothing timed out */ - kptllnd_peer_check_sends(peer); - kptllnd_peer_decref(peer); /* ...until here or... */ - - /* rescan after dropping the lock */ - goto again; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - nsend = kptllnd_count_queue(&peer->peer_sendq); - nactive = kptllnd_count_queue(&peer->peer_activeq); - spin_unlock_irqrestore(&peer->peer_lock, flags); - - LCONSOLE_ERROR_MSG(0x126, "Timing out %s: %s\n", - libcfs_id2str(peer->peer_id), - (tx->tx_tposted == 0) ? - "no free peer buffers" : - "please check Portals"); - - if (tx->tx_tposted) { - CERROR("Could not send to %s after %ds (sent %lds ago); " - "check Portals for possible issues\n", - libcfs_id2str(peer->peer_id), - *kptllnd_tunables.kptl_timeout, - cfs_duration_sec(jiffies - tx->tx_tposted)); - } else { - CERROR("Could not get credits for %s after %ds; " - "possible Lustre networking issues\n", - libcfs_id2str(peer->peer_id), - *kptllnd_tunables.kptl_timeout); - } - - CERROR("%s timed out: cred %d outstanding %d, sent %d, " - "sendq %d, activeq %d Tx %p %s (%s%s%s) status %d " - "%sposted %lu T/O %ds\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - nsend, nactive, tx, kptllnd_tx_typestr(tx->tx_type), - tx->tx_active ? "A" : "", - PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) ? - "" : "M", - PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) ? - "" : "D", - tx->tx_status, - (tx->tx_tposted == 0) ? "not " : "", - (tx->tx_tposted == 0) ? 0UL : (jiffies - tx->tx_tposted), - *kptllnd_tunables.kptl_timeout); - - kptllnd_dump_ptltrace(); - - kptllnd_tx_decref(tx); - - kptllnd_peer_close(peer, -ETIMEDOUT); - kptllnd_peer_decref(peer); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); -} - -kptl_peer_t * -kptllnd_id2peer_locked (lnet_process_id_t id) -{ - struct list_head *peers = kptllnd_nid2peerlist(id.nid); - struct list_head *tmp; - kptl_peer_t *peer; - - list_for_each (tmp, peers) { - - peer = list_entry (tmp, kptl_peer_t, peer_list); - - LASSERT(peer->peer_state == PEER_STATE_WAITING_HELLO || - peer->peer_state == PEER_STATE_ACTIVE); - - if (peer->peer_id.nid != id.nid || - peer->peer_id.pid != id.pid) - continue; - - kptllnd_peer_addref(peer); - - CDEBUG(D_NET, "%s -> %s (%d)\n", - libcfs_id2str(id), - kptllnd_ptlid2str(peer->peer_ptlid), - atomic_read (&peer->peer_refcount)); - return peer; - } - - return NULL; -} - -void -kptllnd_peertable_overflow_msg(char *str, lnet_process_id_t id) -{ - LCONSOLE_ERROR_MSG(0x127, "%s %s overflows the peer table[%d]: " - "messages may be dropped\n", - str, libcfs_id2str(id), - kptllnd_data.kptl_n_active_peers); - LCONSOLE_ERROR_MSG(0x128, "Please correct by increasing " - "'max_nodes' or 'max_procs_per_node'\n"); -} - -__u64 -kptllnd_get_last_seen_matchbits_locked(lnet_process_id_t lpid) -{ - kptl_peer_t *peer; - struct list_head *tmp; - - /* Find the last matchbits I saw this new peer using. Note.. - A. This peer cannot be in the peer table - she's new! - B. If I can't find the peer in the closing/zombie peers, all - matchbits are safe because all refs to the (old) peer have gone - so all txs have completed so there's no risk of matchbit - collision! - */ - - LASSERT(kptllnd_id2peer_locked(lpid) == NULL); - - /* peer's last matchbits can't change after it comes out of the peer - * table, so first match is fine */ - - list_for_each (tmp, &kptllnd_data.kptl_closing_peers) { - peer = list_entry (tmp, kptl_peer_t, peer_list); - - if (peer->peer_id.nid == lpid.nid && - peer->peer_id.pid == lpid.pid) - return peer->peer_last_matchbits_seen; - } - - list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) { - peer = list_entry (tmp, kptl_peer_t, peer_list); - - if (peer->peer_id.nid == lpid.nid && - peer->peer_id.pid == lpid.pid) - return peer->peer_last_matchbits_seen; - } - - return PTL_RESERVED_MATCHBITS; -} - -kptl_peer_t * -kptllnd_peer_handle_hello (ptl_process_id_t initiator, - kptl_msg_t *msg) -{ - rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; - kptl_peer_t *peer; - kptl_peer_t *new_peer; - lnet_process_id_t lpid; - unsigned long flags; - kptl_tx_t *hello_tx; - int rc; - __u64 safe_matchbits; - __u64 last_matchbits_seen; - - lpid.nid = msg->ptlm_srcnid; - lpid.pid = msg->ptlm_srcpid; - - CDEBUG(D_NET, "hello from %s(%s)\n", - libcfs_id2str(lpid), kptllnd_ptlid2str(initiator)); - - if (initiator.pid != kptllnd_data.kptl_portals_id.pid && - (msg->ptlm_srcpid & LNET_PID_USERFLAG) == 0) { - /* If the peer's PID isn't _the_ ptllnd kernel pid, she must be - * userspace. Refuse the connection if she hasn't set the - * correct flag in her PID... */ - CERROR("Userflag not set in hello from %s (%s)\n", - libcfs_id2str(lpid), kptllnd_ptlid2str(initiator)); - return NULL; - } - - /* kptlhm_matchbits are the highest matchbits my peer may have used to - * RDMA to me. I ensure I never register buffers for RDMA that could - * match any she used */ - safe_matchbits = msg->ptlm_u.hello.kptlhm_matchbits + 1; - - if (safe_matchbits < PTL_RESERVED_MATCHBITS) { - CERROR("Illegal matchbits "LPX64" in HELLO from %s\n", - safe_matchbits, libcfs_id2str(lpid)); - return NULL; - } - - if (msg->ptlm_u.hello.kptlhm_max_msg_size < PTLLND_MIN_BUFFER_SIZE) { - CERROR("%s: max message size %d < MIN %d", - libcfs_id2str(lpid), - msg->ptlm_u.hello.kptlhm_max_msg_size, - PTLLND_MIN_BUFFER_SIZE); - return NULL; - } - - if (msg->ptlm_credits <= 1) { - CERROR("Need more than 1+%d credits from %s\n", - msg->ptlm_credits, libcfs_id2str(lpid)); - return NULL; - } - - write_lock_irqsave(g_lock, flags); - - peer = kptllnd_id2peer_locked(lpid); - if (peer != NULL) { - if (peer->peer_state == PEER_STATE_WAITING_HELLO) { - /* Completing HELLO handshake */ - LASSERT(peer->peer_incarnation == 0); - - if (msg->ptlm_dststamp != 0 && - msg->ptlm_dststamp != peer->peer_myincarnation) { - write_unlock_irqrestore(g_lock, flags); - - CERROR("Ignoring HELLO from %s: unexpected " - "dststamp "LPX64" ("LPX64" wanted)\n", - libcfs_id2str(lpid), - msg->ptlm_dststamp, - peer->peer_myincarnation); - kptllnd_peer_decref(peer); - return NULL; - } - - /* Concurrent initiation or response to my HELLO */ - peer->peer_state = PEER_STATE_ACTIVE; - peer->peer_incarnation = msg->ptlm_srcstamp; - peer->peer_next_matchbits = safe_matchbits; - peer->peer_max_msg_size = - msg->ptlm_u.hello.kptlhm_max_msg_size; - - write_unlock_irqrestore(g_lock, flags); - return peer; - } - - if (msg->ptlm_dststamp != 0 && - msg->ptlm_dststamp <= peer->peer_myincarnation) { - write_unlock_irqrestore(g_lock, flags); - - CERROR("Ignoring stale HELLO from %s: " - "dststamp "LPX64" (current "LPX64")\n", - libcfs_id2str(lpid), - msg->ptlm_dststamp, - peer->peer_myincarnation); - kptllnd_peer_decref(peer); - return NULL; - } - - /* Brand new connection attempt: remove old incarnation */ - kptllnd_peer_close_locked(peer, 0); - } - - kptllnd_cull_peertable_locked(lpid); - - write_unlock_irqrestore(g_lock, flags); - - if (peer != NULL) { - CDEBUG(D_NET, "Peer %s (%s) reconnecting:" - " stamp "LPX64"("LPX64")\n", - libcfs_id2str(lpid), kptllnd_ptlid2str(initiator), - msg->ptlm_srcstamp, peer->peer_incarnation); - - kptllnd_peer_decref(peer); - } - - hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (hello_tx == NULL) { - CERROR("Unable to allocate HELLO message for %s\n", - libcfs_id2str(lpid)); - return NULL; - } - - kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO, - sizeof(kptl_hello_msg_t)); - - new_peer = kptllnd_peer_allocate(lpid, initiator); - if (new_peer == NULL) { - kptllnd_tx_decref(hello_tx); - return NULL; - } - - rc = kptllnd_peer_reserve_buffers(); - if (rc != 0) { - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - - CERROR("Failed to reserve buffers for %s\n", - libcfs_id2str(lpid)); - return NULL; - } - - write_lock_irqsave(g_lock, flags); - - again: - if (kptllnd_data.kptl_shutdown) { - write_unlock_irqrestore(g_lock, flags); - - CERROR ("Shutdown started, refusing connection from %s\n", - libcfs_id2str(lpid)); - kptllnd_peer_unreserve_buffers(); - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - return NULL; - } - - peer = kptllnd_id2peer_locked(lpid); - if (peer != NULL) { - if (peer->peer_state == PEER_STATE_WAITING_HELLO) { - /* An outgoing message instantiated 'peer' for me */ - LASSERT(peer->peer_incarnation == 0); - - peer->peer_state = PEER_STATE_ACTIVE; - peer->peer_incarnation = msg->ptlm_srcstamp; - peer->peer_next_matchbits = safe_matchbits; - peer->peer_max_msg_size = - msg->ptlm_u.hello.kptlhm_max_msg_size; - - write_unlock_irqrestore(g_lock, flags); - - CWARN("Outgoing instantiated peer %s\n", - libcfs_id2str(lpid)); - } else { - LASSERT (peer->peer_state == PEER_STATE_ACTIVE); - - write_unlock_irqrestore(g_lock, flags); - - /* WOW! Somehow this peer completed the HELLO - * handshake while I slept. I guess I could have slept - * while it rebooted and sent a new HELLO, so I'll fail - * this one... */ - CWARN("Wow! peer %s\n", libcfs_id2str(lpid)); - kptllnd_peer_decref(peer); - peer = NULL; - } - - kptllnd_peer_unreserve_buffers(); - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - return peer; - } - - if (kptllnd_data.kptl_n_active_peers == - kptllnd_data.kptl_expected_peers) { - /* peer table full */ - write_unlock_irqrestore(g_lock, flags); - - kptllnd_peertable_overflow_msg("Connection from ", lpid); - - rc = kptllnd_reserve_buffers(1); /* HELLO headroom */ - if (rc != 0) { - CERROR("Refusing connection from %s\n", - libcfs_id2str(lpid)); - kptllnd_peer_unreserve_buffers(); - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - return NULL; - } - - write_lock_irqsave(g_lock, flags); - kptllnd_data.kptl_expected_peers++; - goto again; - } - - last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(lpid); - - hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen; - hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size = - *kptllnd_tunables.kptl_max_msg_size; - - new_peer->peer_state = PEER_STATE_ACTIVE; - new_peer->peer_incarnation = msg->ptlm_srcstamp; - new_peer->peer_next_matchbits = safe_matchbits; - new_peer->peer_last_matchbits_seen = last_matchbits_seen; - new_peer->peer_max_msg_size = msg->ptlm_u.hello.kptlhm_max_msg_size; - - kptllnd_peer_add_peertable_locked(new_peer); - - write_unlock_irqrestore(g_lock, flags); - - /* NB someone else could get in now and post a message before I post - * the HELLO, but post_tx/check_sends take care of that! */ - - CDEBUG(D_NETTRACE, "%s: post response hello %p\n", - libcfs_id2str(new_peer->peer_id), hello_tx); - - kptllnd_post_tx(new_peer, hello_tx, 0); - kptllnd_peer_check_sends(new_peer); - - return new_peer; -} - -void -kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag) -{ - kptllnd_post_tx(peer, tx, nfrag); - kptllnd_peer_check_sends(peer); -} - -int -kptllnd_find_target(kptl_peer_t **peerp, lnet_process_id_t target) -{ - rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; - ptl_process_id_t ptl_id; - kptl_peer_t *new_peer; - kptl_tx_t *hello_tx; - unsigned long flags; - int rc; - __u64 last_matchbits_seen; - - /* I expect to find the peer, so I only take a read lock... */ - read_lock_irqsave(g_lock, flags); - *peerp = kptllnd_id2peer_locked(target); - read_unlock_irqrestore(g_lock, flags); - - if (*peerp != NULL) - return 0; - - if ((target.pid & LNET_PID_USERFLAG) != 0) { - CWARN("Refusing to create a new connection to %s " - "(non-kernel peer)\n", libcfs_id2str(target)); - return -EHOSTUNREACH; - } - - /* The new peer is a kernel ptllnd, and kernel ptllnds all have - * the same portals PID */ - ptl_id.nid = kptllnd_lnet2ptlnid(target.nid); - ptl_id.pid = kptllnd_data.kptl_portals_id.pid; - - hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (hello_tx == NULL) { - CERROR("Unable to allocate connect message for %s\n", - libcfs_id2str(target)); - return -ENOMEM; - } - - kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO, - sizeof(kptl_hello_msg_t)); - - new_peer = kptllnd_peer_allocate(target, ptl_id); - if (new_peer == NULL) { - rc = -ENOMEM; - goto unwind_0; - } - - rc = kptllnd_peer_reserve_buffers(); - if (rc != 0) - goto unwind_1; - - write_lock_irqsave(g_lock, flags); - again: - if (kptllnd_data.kptl_shutdown) { - write_unlock_irqrestore(g_lock, flags); - rc = -ESHUTDOWN; - goto unwind_2; - } - - *peerp = kptllnd_id2peer_locked(target); - if (*peerp != NULL) { - write_unlock_irqrestore(g_lock, flags); - goto unwind_2; - } - - kptllnd_cull_peertable_locked(target); - - if (kptllnd_data.kptl_n_active_peers == - kptllnd_data.kptl_expected_peers) { - /* peer table full */ - write_unlock_irqrestore(g_lock, flags); - - kptllnd_peertable_overflow_msg("Connection to ", target); - - rc = kptllnd_reserve_buffers(1); /* HELLO headroom */ - if (rc != 0) { - CERROR("Can't create connection to %s\n", - libcfs_id2str(target)); - rc = -ENOMEM; - goto unwind_2; - } - write_lock_irqsave(g_lock, flags); - kptllnd_data.kptl_expected_peers++; - goto again; - } - - last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(target); - - hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen; - hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size = - *kptllnd_tunables.kptl_max_msg_size; - - new_peer->peer_state = PEER_STATE_WAITING_HELLO; - new_peer->peer_last_matchbits_seen = last_matchbits_seen; - - kptllnd_peer_add_peertable_locked(new_peer); - - write_unlock_irqrestore(g_lock, flags); - - /* NB someone else could get in now and post a message before I post - * the HELLO, but post_tx/check_sends take care of that! */ - - CDEBUG(D_NETTRACE, "%s: post initial hello %p\n", - libcfs_id2str(new_peer->peer_id), hello_tx); - - kptllnd_post_tx(new_peer, hello_tx, 0); - kptllnd_peer_check_sends(new_peer); - - *peerp = new_peer; - return 0; - - unwind_2: - kptllnd_peer_unreserve_buffers(); - unwind_1: - kptllnd_peer_decref(new_peer); - unwind_0: - kptllnd_tx_decref(hello_tx); - - return rc; -} diff --git a/lnet/klnds/ptllnd/ptllnd_ptltrace.c b/lnet/klnds/ptllnd/ptllnd_ptltrace.c deleted file mode 100644 index d82682002ca517db270f8031282cba96c0d90992..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_ptltrace.c +++ /dev/null @@ -1,177 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc. All rights reserved. - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -#ifdef CRAY_XT3 -static struct semaphore ptltrace_mutex; -static struct semaphore ptltrace_signal; - -void -kptllnd_ptltrace_to_file(char *filename) -{ - CFS_DECL_JOURNAL_DATA; - CFS_DECL_MMSPACE; - - cfs_file_t *filp; - char *start; - char *tmpbuf; - int len; - int rc; - loff_t offset = 0; - int eof = 0; - - CWARN("dumping ptltrace to %s\n", filename); - - LIBCFS_ALLOC(tmpbuf, PAGE_SIZE); - if (tmpbuf == NULL) { - CERROR("Can't allocate page buffer to dump %s\n", filename); - return; - } - - CFS_PUSH_JOURNAL; - - filp = cfs_filp_open(filename, - O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc); - if (filp == NULL) { - if (rc != -EEXIST) - CERROR("Error %d creating %s\n", rc, filename); - goto out; - } - - CFS_MMSPACE_OPEN; - - while (!eof) { - start = NULL; - len = ptl_proc_read(tmpbuf, &start, offset, - PAGE_SIZE, &eof, NULL); - - /* we don't allow ptl_proc_read to mimic case 0 or 1 behavior - * for a proc_read method, only #2: from proc_file_read - * - * 2) Set *start = an address within the buffer. - * Put the data of the requested offset at *start. - * Return the number of bytes of data placed there. - * If this number is greater than zero and you - * didn't signal eof and the reader is prepared to - * take more data you will be called again with the - * requested offset advanced by the number of bytes - * absorbed. - */ - - if (len == 0) /* end of file */ - break; - - if (len < 0) { - CERROR("ptl_proc_read: error %d\n", len); - break; - } - - if (start < tmpbuf || start + len > tmpbuf + PAGE_SIZE) { - CERROR("ptl_proc_read bug: %p for %d not in %p for %ld\n", - start, len, tmpbuf, PAGE_SIZE); - break; - } - - rc = cfs_filp_write(filp, start, len, cfs_filp_poff(filp)); - if (rc != len) { - if (rc < 0) - CERROR("Error %d writing %s\n", rc, filename); - else - CERROR("Partial write %d(%d) to %s\n", - rc, len, filename); - break; - } - - offset += len; - } - - CFS_MMSPACE_CLOSE; - - rc = cfs_filp_fsync(filp); - if (rc != 0) - CERROR("Error %d syncing %s\n", rc, filename); - - cfs_filp_close(filp); -out: - CFS_POP_JOURNAL; - LIBCFS_FREE(tmpbuf, PAGE_SIZE); -} - -int -kptllnd_dump_ptltrace_thread(void *arg) -{ - static char fname[1024]; - - libcfs_daemonize("ptltracedump"); - - /* serialise with other instances of me */ - mutex_down(&ptltrace_mutex); - - snprintf(fname, sizeof(fname), "%s.%ld.%ld", - *kptllnd_tunables.kptl_ptltrace_basename, - cfs_time_current_sec(), (long)arg); - - kptllnd_ptltrace_to_file(fname); - - mutex_up(&ptltrace_mutex); - - /* unblock my creator */ - mutex_up(&ptltrace_signal); - - return 0; -} - -void -kptllnd_dump_ptltrace(void) -{ - int rc; - - if (!*kptllnd_tunables.kptl_ptltrace_on_timeout) - return; - - rc = cfs_kernel_thread(kptllnd_dump_ptltrace_thread, - (void *)(long)cfs_curproc_pid(), - CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { - CERROR("Error %d starting ptltrace dump thread\n", rc); - } else { - /* block until thread completes */ - mutex_down(&ptltrace_signal); - } -} - -void -kptllnd_init_ptltrace(void) -{ - init_mutex(&ptltrace_mutex); - init_mutex_locked(&ptltrace_signal); -} - -#else - -void -kptllnd_dump_ptltrace(void) -{ -} - -void -kptllnd_init_ptltrace(void) -{ -} - -#endif diff --git a/lnet/klnds/ptllnd/ptllnd_rx_buf.c b/lnet/klnds/ptllnd/ptllnd_rx_buf.c deleted file mode 100644 index 356660c0a2c79a6dd61f4b04ce530f39bd6d4e51..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_rx_buf.c +++ /dev/null @@ -1,739 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - - #include "ptllnd.h" - -void -kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp) -{ - memset(rxbp, 0, sizeof(*rxbp)); - spin_lock_init(&rxbp->rxbp_lock); - INIT_LIST_HEAD(&rxbp->rxbp_list); -} - -void -kptllnd_rx_buffer_destroy(kptl_rx_buffer_t *rxb) -{ - kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool; - - LASSERT(rxb->rxb_refcount == 0); - LASSERT(PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE)); - LASSERT(!rxb->rxb_posted); - LASSERT(rxb->rxb_idle); - - list_del(&rxb->rxb_list); - rxbp->rxbp_count--; - - LIBCFS_FREE(rxb->rxb_buffer, kptllnd_rx_buffer_size()); - LIBCFS_FREE(rxb, sizeof(*rxb)); -} - -int -kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count) -{ - int bufsize; - int msgs_per_buffer; - int rc; - kptl_rx_buffer_t *rxb; - char *buffer; - unsigned long flags; - - bufsize = kptllnd_rx_buffer_size(); - msgs_per_buffer = bufsize / (*kptllnd_tunables.kptl_max_msg_size); - - CDEBUG(D_NET, "kptllnd_rx_buffer_pool_reserve(%d)\n", count); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - for (;;) { - if (rxbp->rxbp_shutdown) { - rc = -ESHUTDOWN; - break; - } - - if (rxbp->rxbp_reserved + count <= - rxbp->rxbp_count * msgs_per_buffer) { - rc = 0; - break; - } - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - LIBCFS_ALLOC(rxb, sizeof(*rxb)); - LIBCFS_ALLOC(buffer, bufsize); - - if (rxb == NULL || buffer == NULL) { - CERROR("Failed to allocate rx buffer\n"); - - if (rxb != NULL) - LIBCFS_FREE(rxb, sizeof(*rxb)); - if (buffer != NULL) - LIBCFS_FREE(buffer, bufsize); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - rc = -ENOMEM; - break; - } - - memset(rxb, 0, sizeof(*rxb)); - - rxb->rxb_eventarg.eva_type = PTLLND_EVENTARG_TYPE_BUF; - rxb->rxb_refcount = 0; - rxb->rxb_pool = rxbp; - rxb->rxb_idle = 0; - rxb->rxb_posted = 0; - rxb->rxb_buffer = buffer; - rxb->rxb_mdh = PTL_INVALID_HANDLE; - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - if (rxbp->rxbp_shutdown) { - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - LIBCFS_FREE(rxb, sizeof(*rxb)); - LIBCFS_FREE(buffer, bufsize); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - rc = -ESHUTDOWN; - break; - } - - list_add_tail(&rxb->rxb_list, &rxbp->rxbp_list); - rxbp->rxbp_count++; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - kptllnd_rx_buffer_post(rxb); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - } - - if (rc == 0) - rxbp->rxbp_reserved += count; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - return rc; -} - -void -kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp, - int count) -{ - unsigned long flags; - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - CDEBUG(D_NET, "kptllnd_rx_buffer_pool_unreserve(%d)\n", count); - rxbp->rxbp_reserved -= count; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); -} - -void -kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp) -{ - kptl_rx_buffer_t *rxb; - int rc; - int i; - unsigned long flags; - struct list_head *tmp; - struct list_head *nxt; - ptl_handle_md_t mdh; - - /* CAVEAT EMPTOR: I'm racing with everything here!!! - * - * Buffers can still be posted after I set rxbp_shutdown because I - * can't hold rxbp_lock while I'm posting them. - * - * Calling PtlMDUnlink() here races with auto-unlinks; i.e. a buffer's - * MD handle could become invalid under me. I am vulnerable to portals - * re-using handles (i.e. make the same handle valid again, but for a - * different MD) from when the MD is actually unlinked, to when the - * event callback tells me it has been unlinked. */ - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - rxbp->rxbp_shutdown = 1; - - for (i = 9;; i++) { - list_for_each_safe(tmp, nxt, &rxbp->rxbp_list) { - rxb = list_entry (tmp, kptl_rx_buffer_t, rxb_list); - - if (rxb->rxb_idle) { - spin_unlock_irqrestore(&rxbp->rxbp_lock, - flags); - kptllnd_rx_buffer_destroy(rxb); - spin_lock_irqsave(&rxbp->rxbp_lock, - flags); - continue; - } - - mdh = rxb->rxb_mdh; - if (PtlHandleIsEqual(mdh, PTL_INVALID_HANDLE)) - continue; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - rc = PtlMDUnlink(mdh); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - /* callback clears rxb_mdh and drops net's ref - * (which causes repost, but since I set - * shutdown, it will just set the buffer - * idle) */ -#else - if (rc == PTL_OK) { - rxb->rxb_posted = 0; - rxb->rxb_mdh = PTL_INVALID_HANDLE; - kptllnd_rx_buffer_decref_locked(rxb); - } -#endif - } - - if (list_empty(&rxbp->rxbp_list)) - break; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - /* Wait a bit for references to be dropped */ - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d Busy RX Buffers\n", - rxbp->rxbp_count); - - cfs_pause(cfs_time_seconds(1)); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - } - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); -} - -void -kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb) -{ - int rc; - ptl_md_t md; - ptl_handle_me_t meh; - ptl_handle_md_t mdh; - ptl_process_id_t any; - kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool; - unsigned long flags; - - LASSERT (!in_interrupt()); - LASSERT (rxb->rxb_refcount == 0); - LASSERT (!rxb->rxb_idle); - LASSERT (!rxb->rxb_posted); - LASSERT (PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE)); - - any.nid = PTL_NID_ANY; - any.pid = PTL_PID_ANY; - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - if (rxbp->rxbp_shutdown) { - rxb->rxb_idle = 1; - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - return; - } - - rxb->rxb_refcount = 1; /* net's ref */ - rxb->rxb_posted = 1; /* I'm posting */ - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - rc = PtlMEAttach(kptllnd_data.kptl_nih, - *kptllnd_tunables.kptl_portal, - any, - LNET_MSG_MATCHBITS, - 0, /* all matchbits are valid - ignore none */ - PTL_UNLINK, - PTL_INS_AFTER, - &meh); - if (rc != PTL_OK) { - CERROR("PtlMeAttach rxb failed %s(%d)\n", - kptllnd_errtype2str(rc), rc); - goto failed; - } - - /* - * Setup MD - */ - md.start = rxb->rxb_buffer; - md.length = PAGE_SIZE * *kptllnd_tunables.kptl_rxb_npages; - md.threshold = PTL_MD_THRESH_INF; - md.options = PTL_MD_OP_PUT | - PTL_MD_LUSTRE_COMPLETION_SEMANTICS | - PTL_MD_EVENT_START_DISABLE | - PTL_MD_MAX_SIZE | - PTL_MD_LOCAL_ALIGN8; - md.user_ptr = &rxb->rxb_eventarg; - md.max_size = *kptllnd_tunables.kptl_max_msg_size; - md.eq_handle = kptllnd_data.kptl_eqh; - - rc = PtlMDAttach(meh, md, PTL_UNLINK, &mdh); - if (rc == PTL_OK) { - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - if (rxb->rxb_posted) /* Not auto-unlinked yet!!! */ - rxb->rxb_mdh = mdh; - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - return; - } - - CERROR("PtlMDAttach rxb failed %s(%d)\n", - kptllnd_errtype2str(rc), rc); - rc = PtlMEUnlink(meh); - LASSERT(rc == PTL_OK); - - failed: - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - rxb->rxb_posted = 0; - /* XXX this will just try again immediately */ - kptllnd_rx_buffer_decref_locked(rxb); - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); -} - -kptl_rx_t * -kptllnd_rx_alloc(void) -{ - kptl_rx_t* rx; - - if (IS_SIMULATION_ENABLED(FAIL_RX_ALLOC)) { - CERROR ("FAIL_RX_ALLOC SIMULATION triggered\n"); - return NULL; - } - - rx = cfs_mem_cache_alloc(kptllnd_data.kptl_rx_cache, CFS_ALLOC_ATOMIC); - if (rx == NULL) { - CERROR("Failed to allocate rx\n"); - return NULL; - } - - memset(rx, 0, sizeof(*rx)); - return rx; -} - -void -kptllnd_rx_done(kptl_rx_t *rx, int post_credit) -{ - kptl_rx_buffer_t *rxb = rx->rx_rxb; - kptl_peer_t *peer = rx->rx_peer; - unsigned long flags; - - LASSERT (post_credit == PTLLND_POSTRX_NO_CREDIT || - post_credit == PTLLND_POSTRX_PEER_CREDIT); - - CDEBUG(D_NET, "rx=%p rxb %p peer %p\n", rx, rxb, peer); - - if (rxb != NULL) - kptllnd_rx_buffer_decref(rxb); - - if (peer != NULL) { - /* Update credits (after I've decref-ed the buffer) */ - spin_lock_irqsave(&peer->peer_lock, flags); - - if (post_credit == PTLLND_POSTRX_PEER_CREDIT) - peer->peer_outstanding_credits++; - - LASSERT (peer->peer_outstanding_credits + - peer->peer_sent_credits <= - *kptllnd_tunables.kptl_peercredits); - - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: rx %p done\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - rx); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* I might have to send back credits */ - kptllnd_peer_check_sends(peer); - kptllnd_peer_decref(peer); - } - - cfs_mem_cache_free(kptllnd_data.kptl_rx_cache, rx); -} - -void -kptllnd_rx_buffer_callback (ptl_event_t *ev) -{ - kptl_eventarg_t *eva = ev->md.user_ptr; - kptl_rx_buffer_t *rxb = kptllnd_eventarg2obj(eva); - kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool; - kptl_rx_t *rx; - int unlinked; - unsigned long flags; - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - unlinked = ev->unlinked; -#else - unlinked = ev->type == PTL_EVENT_UNLINK; -#endif - - CDEBUG(D_NET, "%s: %s(%d) rxb=%p fail=%s(%d) unlink=%d\n", - kptllnd_ptlid2str(ev->initiator), - kptllnd_evtype2str(ev->type), ev->type, rxb, - kptllnd_errtype2str(ev->ni_fail_type), ev->ni_fail_type, - unlinked); - - LASSERT (!rxb->rxb_idle); - LASSERT (ev->md.start == rxb->rxb_buffer); - LASSERT (ev->offset + ev->mlength <= - PAGE_SIZE * *kptllnd_tunables.kptl_rxb_npages); - LASSERT (ev->type == PTL_EVENT_PUT_END || - ev->type == PTL_EVENT_UNLINK); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->match_bits == LNET_MSG_MATCHBITS); - - if (ev->ni_fail_type != PTL_NI_OK) { - CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn", - kptllnd_ptlid2str(ev->initiator), - kptllnd_evtype2str(ev->type), ev->type, rxb, - kptllnd_errtype2str(ev->ni_fail_type), - ev->ni_fail_type, unlinked); - - } else if (ev->type == PTL_EVENT_PUT_END && - !rxbp->rxbp_shutdown) { - - /* rxbp_shutdown sampled without locking! I only treat it as a - * hint since shutdown can start while rx's are queued on - * kptl_sched_rxq. */ -#if (PTL_MD_LOCAL_ALIGN8 == 0) - /* Portals can't force message alignment - someone sending an - * odd-length message will misalign subsequent messages and - * force the fixup below... */ - if ((ev->mlength & 7) != 0) - CWARN("Message from %s has odd length "LPU64": " - "probable version incompatibility\n", - kptllnd_ptlid2str(ev->initiator), - (__u64)ev->mlength); -#endif - rx = kptllnd_rx_alloc(); - if (rx == NULL) { - CERROR("Message from %s dropped: ENOMEM", - kptllnd_ptlid2str(ev->initiator)); - } else { - if ((ev->offset & 7) == 0) { - kptllnd_rx_buffer_addref(rxb); - rx->rx_rxb = rxb; - rx->rx_nob = ev->mlength; - rx->rx_msg = (kptl_msg_t *) - (rxb->rxb_buffer + ev->offset); - } else { -#if (PTL_MD_LOCAL_ALIGN8 == 0) - /* Portals can't force alignment - copy into - * rx_space (avoiding overflow) to fix */ - int maxlen = *kptllnd_tunables.kptl_max_msg_size; - - rx->rx_rxb = NULL; - rx->rx_nob = MIN(maxlen, ev->mlength); - rx->rx_msg = (kptl_msg_t *)rx->rx_space; - memcpy(rx->rx_msg, rxb->rxb_buffer + ev->offset, - rx->rx_nob); -#else - /* Portals should have forced the alignment */ - LBUG(); -#endif - } - - rx->rx_initiator = ev->initiator; - rx->rx_treceived = jiffies; -#ifdef CRAY_XT3 - rx->rx_uid = ev->uid; -#endif - /* Queue for attention */ - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, - flags); - - list_add_tail(&rx->rx_list, - &kptllnd_data.kptl_sched_rxq); - wake_up(&kptllnd_data.kptl_sched_waitq); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, - flags); - } - } - - if (unlinked) { - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - rxb->rxb_posted = 0; - rxb->rxb_mdh = PTL_INVALID_HANDLE; - kptllnd_rx_buffer_decref_locked(rxb); - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - } -} - -void -kptllnd_nak (kptl_rx_t *rx) -{ - /* Fire-and-forget a stub message that will let the peer know my - * protocol magic/version and make her drop/refresh any peer state she - * might have with me. */ - ptl_md_t md = { - .start = kptllnd_data.kptl_nak_msg, - .length = kptllnd_data.kptl_nak_msg->ptlm_nob, - .threshold = 1, - .options = 0, - .user_ptr = NULL, - .eq_handle = PTL_EQ_NONE}; - ptl_handle_md_t mdh; - int rc; - - rc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &mdh); - if (rc != PTL_OK) { - CWARN("Can't NAK %s: bind failed %s(%d)\n", - kptllnd_ptlid2str(rx->rx_initiator), - kptllnd_errtype2str(rc), rc); - return; - } - - rc = PtlPut(mdh, PTL_NOACK_REQ, rx->rx_initiator, - *kptllnd_tunables.kptl_portal, 0, - LNET_MSG_MATCHBITS, 0, 0); - - if (rc != PTL_OK) - CWARN("Can't NAK %s: put failed %s(%d)\n", - kptllnd_ptlid2str(rx->rx_initiator), - kptllnd_errtype2str(rc), rc); -} - -void -kptllnd_rx_parse(kptl_rx_t *rx) -{ - kptl_msg_t *msg = rx->rx_msg; - int post_credit = PTLLND_POSTRX_PEER_CREDIT; - kptl_peer_t *peer; - int rc; - unsigned long flags; - lnet_process_id_t srcid; - - LASSERT (rx->rx_peer == NULL); - - if ((rx->rx_nob >= 4 && - (msg->ptlm_magic == LNET_PROTO_MAGIC || - msg->ptlm_magic == __swab32(LNET_PROTO_MAGIC))) || - (rx->rx_nob >= 6 && - ((msg->ptlm_magic == PTLLND_MSG_MAGIC && - msg->ptlm_version != PTLLND_MSG_VERSION) || - (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC) && - msg->ptlm_version != __swab16(PTLLND_MSG_VERSION))))) { - /* NAK incompatible versions - * See other LNDs for how to handle this if/when ptllnd begins - * to allow different versions to co-exist */ - CERROR("Bad version: got %04x expected %04x from %s\n", - (__u32)(msg->ptlm_magic == PTLLND_MSG_MAGIC ? - msg->ptlm_version : __swab16(msg->ptlm_version)), - PTLLND_MSG_VERSION, kptllnd_ptlid2str(rx->rx_initiator)); - kptllnd_nak(rx); - goto rx_done; - } - - rc = kptllnd_msg_unpack(msg, rx->rx_nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, kptllnd_ptlid2str(rx->rx_initiator)); - goto rx_done; - } - - srcid.nid = msg->ptlm_srcnid; - srcid.pid = msg->ptlm_srcpid; - - CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks (%ld s)\n", - libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type), - msg->ptlm_credits, rx, rx->rx_rxb, - jiffies - rx->rx_treceived, - cfs_duration_sec(jiffies - rx->rx_treceived)); - - if (srcid.nid != kptllnd_ptl2lnetnid(rx->rx_initiator.nid)) { - CERROR("Bad source id %s from %s\n", - libcfs_id2str(srcid), - kptllnd_ptlid2str(rx->rx_initiator)); - goto rx_done; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) { - peer = kptllnd_id2peer(srcid); - if (peer == NULL) - goto rx_done; - - CWARN("NAK from %s (%s)\n", - libcfs_id2str(srcid), - kptllnd_ptlid2str(rx->rx_initiator)); - - rc = -EPROTO; - goto failed; - } - - if (msg->ptlm_dstnid != kptllnd_data.kptl_ni->ni_nid || - msg->ptlm_dstpid != the_lnet.ln_pid) { - CERROR("Bad dstid %s (expected %s) from %s\n", - libcfs_id2str((lnet_process_id_t) { - .nid = msg->ptlm_dstnid, - .pid = msg->ptlm_dstpid}), - libcfs_id2str((lnet_process_id_t) { - .nid = kptllnd_data.kptl_ni->ni_nid, - .pid = the_lnet.ln_pid}), - kptllnd_ptlid2str(rx->rx_initiator)); - goto rx_done; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) { - peer = kptllnd_peer_handle_hello(rx->rx_initiator, msg); - if (peer == NULL) - goto rx_done; - } else { - peer = kptllnd_id2peer(srcid); - if (peer == NULL) { - CWARN("NAK %s: no connection; peer must reconnect\n", - libcfs_id2str(srcid)); - /* NAK to make the peer reconnect */ - kptllnd_nak(rx); - goto rx_done; - } - - /* Ignore anything apart from HELLO while I'm waiting for it and - * any messages for a previous incarnation of the connection */ - if (peer->peer_state == PEER_STATE_WAITING_HELLO || - msg->ptlm_dststamp < peer->peer_myincarnation) { - kptllnd_peer_decref(peer); - goto rx_done; - } - - if (msg->ptlm_srcstamp != peer->peer_incarnation) { - CERROR("%s: Unexpected srcstamp "LPX64" " - "("LPX64" expected)\n", - libcfs_id2str(peer->peer_id), - msg->ptlm_srcstamp, - peer->peer_incarnation); - rc = -EPROTO; - goto failed; - } - - if (msg->ptlm_dststamp != peer->peer_myincarnation) { - CERROR("%s: Unexpected dststamp "LPX64" " - "("LPX64" expected)\n", - libcfs_id2str(peer->peer_id), msg->ptlm_dststamp, - peer->peer_myincarnation); - rc = -EPROTO; - goto failed; - } - } - - LASSERT (msg->ptlm_srcnid == peer->peer_id.nid && - msg->ptlm_srcpid == peer->peer_id.pid); - - spin_lock_irqsave(&peer->peer_lock, flags); - - /* Check peer only sends when I've sent her credits */ - if (peer->peer_sent_credits == 0) { - int c = peer->peer_credits; - int oc = peer->peer_outstanding_credits; - int sc = peer->peer_sent_credits; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - CERROR("%s: buffer overrun [%d/%d+%d]\n", - libcfs_id2str(peer->peer_id), c, sc, oc); - goto failed; - } - peer->peer_sent_credits--; - - /* No check for credit overflow - the peer may post new - * buffers after the startup handshake. */ - peer->peer_credits += msg->ptlm_credits; - - /* This ensures the credit taken by NOOP can be returned */ - if (msg->ptlm_type == PTLLND_MSG_TYPE_NOOP) { - peer->peer_outstanding_credits++; - post_credit = PTLLND_POSTRX_NO_CREDIT; - } - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* See if something can go out now that credits have come in */ - if (msg->ptlm_credits != 0) - kptllnd_peer_check_sends(peer); - - /* ptllnd-level protocol correct - rx takes my ref on peer and increments - * peer_outstanding_credits when it completes */ - rx->rx_peer = peer; - kptllnd_peer_alive(peer); - - switch (msg->ptlm_type) { - default: - /* already checked by kptllnd_msg_unpack() */ - LBUG(); - - case PTLLND_MSG_TYPE_HELLO: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_HELLO\n"); - goto rx_done; - - case PTLLND_MSG_TYPE_NOOP: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_NOOP\n"); - goto rx_done; - - case PTLLND_MSG_TYPE_IMMEDIATE: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n"); - rc = lnet_parse(kptllnd_data.kptl_ni, - &msg->ptlm_u.immediate.kptlim_hdr, - msg->ptlm_srcnid, - rx, 0); - if (rc >= 0) /* kptllnd_recv owns 'rx' now */ - return; - goto failed; - - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n", - msg->ptlm_type == PTLLND_MSG_TYPE_PUT ? - "PUT" : "GET"); - - /* checked in kptllnd_msg_unpack() */ - LASSERT (msg->ptlm_u.rdma.kptlrm_matchbits >= - PTL_RESERVED_MATCHBITS); - - /* Update last match bits seen */ - spin_lock_irqsave(&peer->peer_lock, flags); - - if (msg->ptlm_u.rdma.kptlrm_matchbits > - rx->rx_peer->peer_last_matchbits_seen) - rx->rx_peer->peer_last_matchbits_seen = - msg->ptlm_u.rdma.kptlrm_matchbits; - - spin_unlock_irqrestore(&rx->rx_peer->peer_lock, flags); - - rc = lnet_parse(kptllnd_data.kptl_ni, - &msg->ptlm_u.rdma.kptlrm_hdr, - msg->ptlm_srcnid, - rx, 1); - if (rc >= 0) /* kptllnd_recv owns 'rx' now */ - return; - goto failed; - } - - failed: - kptllnd_peer_close(peer, rc); - if (rx->rx_peer == NULL) /* drop ref on peer */ - kptllnd_peer_decref(peer); /* unless rx_done will */ - rx_done: - kptllnd_rx_done(rx, post_credit); -} diff --git a/lnet/klnds/ptllnd/ptllnd_tx.c b/lnet/klnds/ptllnd/ptllnd_tx.c deleted file mode 100644 index 814a7d91341694a2e49997515564f2094b2ef044..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_tx.c +++ /dev/null @@ -1,507 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - - #include "ptllnd.h" - -void -kptllnd_free_tx(kptl_tx_t *tx) -{ - if (tx->tx_msg != NULL) - LIBCFS_FREE(tx->tx_msg, sizeof(*tx->tx_msg)); - - if (tx->tx_frags != NULL) - LIBCFS_FREE(tx->tx_frags, sizeof(*tx->tx_frags)); - - LIBCFS_FREE(tx, sizeof(*tx)); - - atomic_dec(&kptllnd_data.kptl_ntx); - - /* Keep the tunable in step for visibility */ - *kptllnd_tunables.kptl_ntx = atomic_read(&kptllnd_data.kptl_ntx); -} - -kptl_tx_t * -kptllnd_alloc_tx(void) -{ - kptl_tx_t *tx; - - LIBCFS_ALLOC(tx, sizeof(*tx)); - if (tx == NULL) { - CERROR("Failed to allocate TX\n"); - return NULL; - } - - atomic_inc(&kptllnd_data.kptl_ntx); - - /* Keep the tunable in step for visibility */ - *kptllnd_tunables.kptl_ntx = atomic_read(&kptllnd_data.kptl_ntx); - - tx->tx_idle = 1; - tx->tx_rdma_mdh = PTL_INVALID_HANDLE; - tx->tx_msg_mdh = PTL_INVALID_HANDLE; - tx->tx_rdma_eventarg.eva_type = PTLLND_EVENTARG_TYPE_RDMA; - tx->tx_msg_eventarg.eva_type = PTLLND_EVENTARG_TYPE_MSG; - tx->tx_msg = NULL; - tx->tx_frags = NULL; - - LIBCFS_ALLOC(tx->tx_msg, sizeof(*tx->tx_msg)); - if (tx->tx_msg == NULL) { - CERROR("Failed to allocate TX payload\n"); - goto failed; - } - - LIBCFS_ALLOC(tx->tx_frags, sizeof(*tx->tx_frags)); - if (tx->tx_frags == NULL) { - CERROR("Failed to allocate TX frags\n"); - goto failed; - } - - return tx; - - failed: - kptllnd_free_tx(tx); - return NULL; -} - -int -kptllnd_setup_tx_descs() -{ - int n = *kptllnd_tunables.kptl_ntx; - int i; - - for (i = 0; i < n; i++) { - kptl_tx_t *tx = kptllnd_alloc_tx(); - - if (tx == NULL) - return -ENOMEM; - - spin_lock(&kptllnd_data.kptl_tx_lock); - - list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs); - - spin_unlock(&kptllnd_data.kptl_tx_lock); - } - - return 0; -} - -void -kptllnd_cleanup_tx_descs() -{ - kptl_tx_t *tx; - - /* No locking; single threaded now */ - LASSERT (kptllnd_data.kptl_shutdown == 2); - - while (!list_empty(&kptllnd_data.kptl_idle_txs)) { - tx = list_entry(kptllnd_data.kptl_idle_txs.next, - kptl_tx_t, tx_list); - - list_del(&tx->tx_list); - kptllnd_free_tx(tx); - } - - LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0); -} - -kptl_tx_t * -kptllnd_get_idle_tx(enum kptl_tx_type type) -{ - kptl_tx_t *tx = NULL; - - if (IS_SIMULATION_ENABLED(FAIL_TX_PUT_ALLOC) && - type == TX_TYPE_PUT_REQUEST) { - CERROR("FAIL_TX_PUT_ALLOC SIMULATION triggered\n"); - return NULL; - } - - if (IS_SIMULATION_ENABLED(FAIL_TX_GET_ALLOC) && - type == TX_TYPE_GET_REQUEST) { - CERROR ("FAIL_TX_GET_ALLOC SIMULATION triggered\n"); - return NULL; - } - - if (IS_SIMULATION_ENABLED(FAIL_TX)) { - CERROR ("FAIL_TX SIMULATION triggered\n"); - return NULL; - } - - spin_lock(&kptllnd_data.kptl_tx_lock); - - if (list_empty (&kptllnd_data.kptl_idle_txs)) { - spin_unlock(&kptllnd_data.kptl_tx_lock); - - tx = kptllnd_alloc_tx(); - if (tx == NULL) - return NULL; - } else { - tx = list_entry(kptllnd_data.kptl_idle_txs.next, - kptl_tx_t, tx_list); - list_del(&tx->tx_list); - - spin_unlock(&kptllnd_data.kptl_tx_lock); - } - - LASSERT (atomic_read(&tx->tx_refcount)== 0); - LASSERT (tx->tx_idle); - LASSERT (!tx->tx_active); - LASSERT (tx->tx_lnet_msg == NULL); - LASSERT (tx->tx_lnet_replymsg == NULL); - LASSERT (tx->tx_peer == NULL); - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - - tx->tx_type = type; - atomic_set(&tx->tx_refcount, 1); - tx->tx_status = 0; - tx->tx_idle = 0; - tx->tx_tposted = 0; - tx->tx_acked = *kptllnd_tunables.kptl_ack_puts; - - CDEBUG(D_NET, "tx=%p\n", tx); - return tx; -} - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS -int -kptllnd_tx_abort_netio(kptl_tx_t *tx) -{ - kptl_peer_t *peer = tx->tx_peer; - ptl_handle_md_t msg_mdh; - ptl_handle_md_t rdma_mdh; - unsigned long flags; - - LASSERT (atomic_read(&tx->tx_refcount) == 0); - LASSERT (!tx->tx_active); - - spin_lock_irqsave(&peer->peer_lock, flags); - - msg_mdh = tx->tx_msg_mdh; - rdma_mdh = tx->tx_rdma_mdh; - - if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return 0; - } - - /* Uncompleted comms: there must have been some error and it must be - * propagated to LNET... */ - LASSERT (tx->tx_status != 0 || - (tx->tx_lnet_msg == NULL && - tx->tx_lnet_replymsg == NULL)); - - /* stash the tx on its peer until it completes */ - atomic_set(&tx->tx_refcount, 1); - tx->tx_active = 1; - list_add_tail(&tx->tx_list, &peer->peer_activeq); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* These unlinks will ensure completion events (normal or unlink) will - * happen ASAP */ - - if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE)) - PtlMDUnlink(msg_mdh); - - if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) - PtlMDUnlink(rdma_mdh); - - return -EAGAIN; -} -#else -int -kptllnd_tx_abort_netio(kptl_tx_t *tx) -{ - ptl_peer_t *peer = tx->tx_peer; - ptl_handle_md_t msg_mdh; - ptl_handle_md_t rdma_mdh; - unsigned long flags; - ptl_err_t prc; - - LASSERT (atomic_read(&tx->tx_refcount) == 0); - LASSERT (!tx->tx_active); - - spin_lock_irqsave(&peer->peer_lock, flags); - - msg_mdh = tx->tx_msg_mdh; - rdma_mdh = tx->tx_rdma_mdh; - - if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return 0; - } - - /* Uncompleted comms: there must have been some error and it must be - * propagated to LNET... */ - LASSERT (tx->tx_status != 0 || - (tx->tx_lnet_msg == NULL && - tx->tx_replymsg == NULL)); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE)) { - prc = PtlMDUnlink(msg_mdh); - if (prc == PTL_OK) - msg_mdh = PTL_INVALID_HANDLE; - } - - if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - prc = PtlMDUnlink(rdma_mdh); - if (prc == PTL_OK) - rdma_mdh = PTL_INVALID_HANDLE; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - - /* update tx_???_mdh if callback hasn't fired */ - if (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)) - msg_mdh = PTL_INVALID_HANDLE; - else - tx->tx_msg_mdh = msg_mdh; - - if (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)) - rdma_mdh = PTL_INVALID_HANDLE; - else - tx->tx_rdma_mdh = rdma_mdh; - - if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return 0; - } - - /* stash the tx on its peer until it completes */ - atomic_set(&tx->tx_refcount, 1); - tx->tx_active = 1; - list_add_tail(&tx->tx_list, &peer->peer_activeq); - - kptllnd_peer_addref(peer); /* extra ref for me... */ - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* This will get the watchdog thread to try aborting all the peer's - * comms again. NB, this deems it fair that 1 failing tx which can't - * be aborted immediately (i.e. its MDs are still busy) is valid cause - * to nuke everything to the same peer! */ - kptllnd_peer_close(peer, tx->tx_status); - - kptllnd_peer_decref(peer); - - return -EAGAIN; -} -#endif - -void -kptllnd_tx_fini (kptl_tx_t *tx) -{ - lnet_msg_t *replymsg = tx->tx_lnet_replymsg; - lnet_msg_t *msg = tx->tx_lnet_msg; - kptl_peer_t *peer = tx->tx_peer; - int status = tx->tx_status; - int rc; - - LASSERT (!in_interrupt()); - LASSERT (atomic_read(&tx->tx_refcount) == 0); - LASSERT (!tx->tx_idle); - LASSERT (!tx->tx_active); - - /* TX has completed or failed */ - - if (peer != NULL) { - rc = kptllnd_tx_abort_netio(tx); - if (rc != 0) - return; - } - - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - - tx->tx_lnet_msg = tx->tx_lnet_replymsg = NULL; - tx->tx_peer = NULL; - tx->tx_idle = 1; - - spin_lock(&kptllnd_data.kptl_tx_lock); - list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs); - spin_unlock(&kptllnd_data.kptl_tx_lock); - - /* Must finalize AFTER freeing 'tx' */ - if (msg != NULL) - lnet_finalize(kptllnd_data.kptl_ni, msg, - (replymsg == NULL) ? status : 0); - - if (replymsg != NULL) - lnet_finalize(kptllnd_data.kptl_ni, replymsg, status); - - if (peer != NULL) - kptllnd_peer_decref(peer); -} - -const char * -kptllnd_tx_typestr(int type) -{ - switch (type) { - default: - return "<TYPE UNKNOWN>"; - - case TX_TYPE_SMALL_MESSAGE: - return "msg"; - - case TX_TYPE_PUT_REQUEST: - return "put_req"; - - case TX_TYPE_GET_REQUEST: - return "get_req"; - break; - - case TX_TYPE_PUT_RESPONSE: - return "put_rsp"; - break; - - case TX_TYPE_GET_RESPONSE: - return "get_rsp"; - } -} - -void -kptllnd_tx_callback(ptl_event_t *ev) -{ - kptl_eventarg_t *eva = ev->md.user_ptr; - int ismsg = (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG); - kptl_tx_t *tx = kptllnd_eventarg2obj(eva); - kptl_peer_t *peer = tx->tx_peer; - int ok = (ev->ni_fail_type == PTL_OK); - int unlinked; - unsigned long flags; - - LASSERT (peer != NULL); - LASSERT (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG || - eva->eva_type == PTLLND_EVENTARG_TYPE_RDMA); - LASSERT (!ismsg || !PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - LASSERT (ismsg || !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - unlinked = ev->unlinked; -#else - unlinked = (ev->type == PTL_EVENT_UNLINK); -#endif - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - kptllnd_evtype2str(ev->type), ev->type, - tx, kptllnd_errtype2str(ev->ni_fail_type), - ev->ni_fail_type, unlinked); - - switch (tx->tx_type) { - default: - LBUG(); - - case TX_TYPE_SMALL_MESSAGE: - LASSERT (ismsg); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->type == PTL_EVENT_SEND_END || - (ev->type == PTL_EVENT_ACK && tx->tx_acked)); - break; - - case TX_TYPE_PUT_REQUEST: - LASSERT (ev->type == PTL_EVENT_UNLINK || - (ismsg && ev->type == PTL_EVENT_SEND_END) || - (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) || - (!ismsg && ev->type == PTL_EVENT_GET_END)); - break; - - case TX_TYPE_GET_REQUEST: - LASSERT (ev->type == PTL_EVENT_UNLINK || - (ismsg && ev->type == PTL_EVENT_SEND_END) || - (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) || - (!ismsg && ev->type == PTL_EVENT_PUT_END)); - - if (!ismsg && ok && ev->type == PTL_EVENT_PUT_END) { - if (ev->hdr_data == PTLLND_RDMA_OK) { - lnet_set_reply_msg_len( - kptllnd_data.kptl_ni, - tx->tx_lnet_replymsg, - ev->mlength); - } else { - /* no match at peer */ - tx->tx_status = -EIO; - } - } - break; - - case TX_TYPE_PUT_RESPONSE: - LASSERT (!ismsg); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->type == PTL_EVENT_SEND_END || - ev->type == PTL_EVENT_REPLY_END); - break; - - case TX_TYPE_GET_RESPONSE: - LASSERT (!ismsg); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->type == PTL_EVENT_SEND_END || - (ev->type == PTL_EVENT_ACK && tx->tx_acked)); - break; - } - - if (ok) { - kptllnd_peer_alive(peer); - } else { - CERROR("Portals error to %s: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n", - libcfs_id2str(peer->peer_id), - kptllnd_evtype2str(ev->type), ev->type, - tx, kptllnd_errtype2str(ev->ni_fail_type), - ev->ni_fail_type, unlinked); - tx->tx_status = -EIO; - kptllnd_peer_close(peer, -EIO); - } - - if (!unlinked) - return; - - spin_lock_irqsave(&peer->peer_lock, flags); - - if (ismsg) - tx->tx_msg_mdh = PTL_INVALID_HANDLE; - else - tx->tx_rdma_mdh = PTL_INVALID_HANDLE; - - if (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) || - !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) || - !tx->tx_active) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return; - } - - list_del(&tx->tx_list); - tx->tx_active = 0; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* drop peer's ref, but if it was the last one... */ - if (atomic_dec_and_test(&tx->tx_refcount)) { - /* ...finalize it in thread context! */ - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - - list_add_tail(&tx->tx_list, &kptllnd_data.kptl_sched_txq); - wake_up(&kptllnd_data.kptl_sched_waitq); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags); - } -} diff --git a/lnet/klnds/ptllnd/wirecheck.c b/lnet/klnds/ptllnd/wirecheck.c deleted file mode 100644 index 8111cbb3f9b7e11f404eba6cc79263ac35d3ffd1..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/wirecheck.c +++ /dev/null @@ -1,206 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ -#include <stdio.h> -#include <string.h> -#include <sys/types.h> -#include <sys/wait.h> - -#include <config.h> - -#include <lnet/api-support.h> - -/* This ghastly hack to allows me to include lib-types.h It doesn't affect any - * assertions generated here (but fails-safe if it ever does) */ -typedef struct { - int counter; -} atomic_t; - -#include <lnet/lib-types.h> -#include <lnet/ptllnd_wire.h> - -#ifndef HAVE_STRNLEN -#define strnlen(s, i) strlen(s) -#endif - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#undef STRINGIFY -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf (" CLASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE((int)offsetof(s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - BLANK_LINE (); \ - COMMENT ("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(s)); \ -} while (0) - -void -system_string (char *cmdline, char *str, int len) -{ - int fds[2]; - int rc; - pid_t pid; - - rc = pipe (fds); - if (rc != 0) - abort (); - - pid = fork (); - if (pid == 0) { - /* child */ - int fd = fileno(stdout); - - rc = dup2(fds[1], fd); - if (rc != fd) - abort(); - - exit(system(cmdline)); - /* notreached */ - } else if ((int)pid < 0) { - abort(); - } else { - FILE *f = fdopen (fds[0], "r"); - - if (f == NULL) - abort(); - - close(fds[1]); - - if (fgets(str, len, f) == NULL) - abort(); - - if (waitpid(pid, &rc, 0) != pid) - abort(); - - if (!WIFEXITED(rc) || - WEXITSTATUS(rc) != 0) - abort(); - - if (strnlen(str, len) == len) - str[len - 1] = 0; - - if (str[strlen(str) - 1] == '\n') - str[strlen(str) - 1] = 0; - - fclose(f); - } -} - -int -main (int argc, char **argv) -{ - char unameinfo[80]; - char gccinfo[80]; - - system_string("uname -a", unameinfo, sizeof(unameinfo)); - system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); - - printf ("void kptllnd_assert_wire_constants (void)\n" - "{\n" - " /* Wire protocol assertions generated by 'wirecheck'\n" - " * running on %s\n" - " * with %s */\n" - "\n", unameinfo, gccinfo); - - BLANK_LINE (); - - COMMENT ("Constants..."); - CHECK_DEFINE (PTL_RESERVED_MATCHBITS); - CHECK_DEFINE (LNET_MSG_MATCHBITS); - - CHECK_DEFINE (PTLLND_MSG_MAGIC); - CHECK_DEFINE (PTLLND_MSG_VERSION); - - CHECK_DEFINE (PTLLND_RDMA_OK); - CHECK_DEFINE (PTLLND_RDMA_FAIL); - - CHECK_DEFINE (PTLLND_MSG_TYPE_INVALID); - CHECK_DEFINE (PTLLND_MSG_TYPE_PUT); - CHECK_DEFINE (PTLLND_MSG_TYPE_GET); - CHECK_DEFINE (PTLLND_MSG_TYPE_IMMEDIATE); - CHECK_DEFINE (PTLLND_MSG_TYPE_NOOP); - CHECK_DEFINE (PTLLND_MSG_TYPE_HELLO); - CHECK_DEFINE (PTLLND_MSG_TYPE_NAK); - - CHECK_STRUCT (kptl_msg_t); - CHECK_MEMBER (kptl_msg_t, ptlm_magic); - CHECK_MEMBER (kptl_msg_t, ptlm_version); - CHECK_MEMBER (kptl_msg_t, ptlm_type); - CHECK_MEMBER (kptl_msg_t, ptlm_credits); - CHECK_MEMBER (kptl_msg_t, ptlm_nob); - CHECK_MEMBER (kptl_msg_t, ptlm_cksum); - CHECK_MEMBER (kptl_msg_t, ptlm_srcnid); - CHECK_MEMBER (kptl_msg_t, ptlm_srcstamp); - CHECK_MEMBER (kptl_msg_t, ptlm_dstnid); - CHECK_MEMBER (kptl_msg_t, ptlm_dststamp); - CHECK_MEMBER (kptl_msg_t, ptlm_srcpid); - CHECK_MEMBER (kptl_msg_t, ptlm_dstpid); - CHECK_MEMBER (kptl_msg_t, ptlm_u.immediate); - CHECK_MEMBER (kptl_msg_t, ptlm_u.rdma); - CHECK_MEMBER (kptl_msg_t, ptlm_u.hello); - - CHECK_STRUCT (kptl_immediate_msg_t); - CHECK_MEMBER (kptl_immediate_msg_t, kptlim_hdr); - CHECK_MEMBER (kptl_immediate_msg_t, kptlim_payload[13]); - - CHECK_STRUCT (kptl_rdma_msg_t); - CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_hdr); - CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_matchbits); - - CHECK_STRUCT (kptl_hello_msg_t); - CHECK_MEMBER (kptl_hello_msg_t, kptlhm_matchbits); - CHECK_MEMBER (kptl_hello_msg_t, kptlhm_max_msg_size); - - printf ("}\n\n"); - - return (0); -} diff --git a/lnet/klnds/qswlnd/.cvsignore b/lnet/klnds/qswlnd/.cvsignore deleted file mode 100644 index 48b17e932f572d544111618c901df6eec08b9dcc..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.*.cmd -.tmp_versions -.depend diff --git a/lnet/klnds/qswlnd/Makefile.in b/lnet/klnds/qswlnd/Makefile.in deleted file mode 100644 index b623e029799bbe7d787c13c409952ca01b4a4086..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kqswlnd -kqswlnd-objs := qswlnd.o qswlnd_cb.o qswlnd_modparams.o - -EXTRA_POST_CFLAGS := @QSWCPPFLAGS@ -I/usr/include - -@INCLUDE_RULES@ diff --git a/lnet/klnds/qswlnd/autoMakefile.am b/lnet/klnds/qswlnd/autoMakefile.am deleted file mode 100644 index 721e86fc621e87e61f76e03a393f570fb3c2bd35..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_QSWLND -modulenet_DATA = kqswlnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kqswlnd-objs:%.o=%.c) qswlnd.h diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c deleted file mode 100644 index ef46fcabf589d687c5b1afc5cc56ab15fc322749..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/qswlnd.c +++ /dev/null @@ -1,553 +0,0 @@ - /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * Copyright (C) 2002-2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswlnd.h" - - -lnd_t the_kqswlnd = -{ - .lnd_type = QSWLND, - .lnd_startup = kqswnal_startup, - .lnd_shutdown = kqswnal_shutdown, - .lnd_ctl = kqswnal_ctl, - .lnd_send = kqswnal_send, - .lnd_recv = kqswnal_recv, -}; - -kqswnal_data_t kqswnal_data; - -int -kqswnal_get_tx_desc (struct libcfs_ioctl_data *data) -{ - unsigned long flags; - struct list_head *tmp; - kqswnal_tx_t *ktx; - lnet_hdr_t *hdr; - int index = data->ioc_count; - int rc = -ENOENT; - - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - list_for_each (tmp, &kqswnal_data.kqn_activetxds) { - if (index-- != 0) - continue; - - ktx = list_entry (tmp, kqswnal_tx_t, ktx_list); - hdr = (lnet_hdr_t *)ktx->ktx_buffer; - - data->ioc_count = le32_to_cpu(hdr->payload_length); - data->ioc_nid = le64_to_cpu(hdr->dest_nid); - data->ioc_u64[0] = ktx->ktx_nid; - data->ioc_u32[0] = le32_to_cpu(hdr->type); - data->ioc_u32[1] = ktx->ktx_launcher; - data->ioc_flags = (list_empty (&ktx->ktx_schedlist) ? 0 : 1) | - (ktx->ktx_state << 2); - rc = 0; - break; - } - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - return (rc); -} - -int -kqswnal_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - - LASSERT (ni == kqswnal_data.kqn_ni); - - switch (cmd) { - case IOC_LIBCFS_GET_TXDESC: - return (kqswnal_get_tx_desc (data)); - - case IOC_LIBCFS_REGISTER_MYNID: - if (data->ioc_nid == ni->ni_nid) - return 0; - - LASSERT (LNET_NIDNET(data->ioc_nid) == LNET_NIDNET(ni->ni_nid)); - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID for %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return 0; - - default: - return (-EINVAL); - } -} - -void -kqswnal_shutdown(lnet_ni_t *ni) -{ - unsigned long flags; - kqswnal_tx_t *ktx; - kqswnal_rx_t *krx; - - CDEBUG (D_NET, "shutdown\n"); - LASSERT (ni->ni_data == &kqswnal_data); - LASSERT (ni == kqswnal_data.kqn_ni); - - switch (kqswnal_data.kqn_init) - { - default: - LASSERT (0); - - case KQN_INIT_ALL: - case KQN_INIT_DATA: - break; - } - - /**********************************************************************/ - /* Signal the start of shutdown... */ - spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags); - kqswnal_data.kqn_shuttingdown = 1; - spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags); - - /**********************************************************************/ - /* wait for sends that have allocated a tx desc to launch or give up */ - while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) { - CDEBUG(D_NET, "waiting for %d pending sends\n", - atomic_read (&kqswnal_data.kqn_pending_txs)); - cfs_pause(cfs_time_seconds(1)); - } - - /**********************************************************************/ - /* close elan comms */ - /* Shut down receivers first; rx callbacks might try sending... */ - if (kqswnal_data.kqn_eprx_small != NULL) - ep_free_rcvr (kqswnal_data.kqn_eprx_small); - - if (kqswnal_data.kqn_eprx_large != NULL) - ep_free_rcvr (kqswnal_data.kqn_eprx_large); - - /* NB ep_free_rcvr() returns only after we've freed off all receive - * buffers (see shutdown handling in kqswnal_requeue_rx()). This - * means we must have completed any messages we passed to - * lnet_parse() */ - - if (kqswnal_data.kqn_eptx != NULL) - ep_free_xmtr (kqswnal_data.kqn_eptx); - - /* NB ep_free_xmtr() returns only after all outstanding transmits - * have called their callback... */ - LASSERT(list_empty(&kqswnal_data.kqn_activetxds)); - - /**********************************************************************/ - /* flag threads to terminate, wake them and wait for them to die */ - kqswnal_data.kqn_shuttingdown = 2; - wake_up_all (&kqswnal_data.kqn_sched_waitq); - - while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) { - CDEBUG(D_NET, "waiting for %d threads to terminate\n", - atomic_read (&kqswnal_data.kqn_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - - /**********************************************************************/ - /* No more threads. No more portals, router or comms callbacks! - * I control the horizontals and the verticals... - */ - - LASSERT (list_empty (&kqswnal_data.kqn_readyrxds)); - LASSERT (list_empty (&kqswnal_data.kqn_donetxds)); - LASSERT (list_empty (&kqswnal_data.kqn_delayedtxds)); - - /**********************************************************************/ - /* Unmap message buffers and free all descriptors and buffers - */ - - /* FTTB, we need to unmap any remaining mapped memory. When - * ep_dvma_release() get fixed (and releases any mappings in the - * region), we can delete all the code from here --------> */ - - for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx = ktx->ktx_alloclist) { - /* If ktx has a buffer, it got mapped; unmap now. NB only - * the pre-mapped stuff is still mapped since all tx descs - * must be idle */ - - if (ktx->ktx_buffer != NULL) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_tx_nmh, - &ktx->ktx_ebuffer); - } - - for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) { - /* If krx_kiov[0].kiov_page got allocated, it got mapped. - * NB subsequent pages get merged */ - - if (krx->krx_kiov[0].kiov_page != NULL) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_rx_nmh, - &krx->krx_elanbuffer); - } - /* <----------- to here */ - - if (kqswnal_data.kqn_ep_rx_nmh != NULL) - ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh); - - if (kqswnal_data.kqn_ep_tx_nmh != NULL) - ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh); - - while (kqswnal_data.kqn_txds != NULL) { - ktx = kqswnal_data.kqn_txds; - - if (ktx->ktx_buffer != NULL) - LIBCFS_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); - - kqswnal_data.kqn_txds = ktx->ktx_alloclist; - LIBCFS_FREE(ktx, sizeof(*ktx)); - } - - while (kqswnal_data.kqn_rxds != NULL) { - int i; - - krx = kqswnal_data.kqn_rxds; - for (i = 0; i < krx->krx_npages; i++) - if (krx->krx_kiov[i].kiov_page != NULL) - __free_page (krx->krx_kiov[i].kiov_page); - - kqswnal_data.kqn_rxds = krx->krx_alloclist; - LIBCFS_FREE(krx, sizeof (*krx)); - } - - /* resets flags, pointers to NULL etc */ - memset(&kqswnal_data, 0, sizeof (kqswnal_data)); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&libcfs_kmemory)); - - PORTAL_MODULE_UNUSE; -} - -int -kqswnal_startup (lnet_ni_t *ni) -{ - EP_RAILMASK all_rails = EP_RAILMASK_ALL; - int rc; - int i; - kqswnal_rx_t *krx; - kqswnal_tx_t *ktx; - int elan_page_idx; - - LASSERT (ni->ni_lnd == &the_kqswlnd); - -#if KQSW_CKSUM - if (the_lnet.ln_ptlcompat != 0) { - CERROR("Checksumming version not portals compatible\n"); - return -ENODEV; - } -#endif - /* Only 1 instance supported */ - if (kqswnal_data.kqn_init != KQN_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (ni->ni_interfaces[0] != NULL) { - CERROR("Explicit interface config not supported\n"); - return -EPERM; - } - - if (*kqswnal_tunables.kqn_credits >= - *kqswnal_tunables.kqn_ntxmsgs) { - LCONSOLE_ERROR_MSG(0x12e, "Configuration error: please set " - "ntxmsgs(%d) > credits(%d)\n", - *kqswnal_tunables.kqn_ntxmsgs, - *kqswnal_tunables.kqn_credits); - } - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&libcfs_kmemory)); - - /* ensure all pointers NULL etc */ - memset (&kqswnal_data, 0, sizeof (kqswnal_data)); - - kqswnal_data.kqn_ni = ni; - ni->ni_data = &kqswnal_data; - ni->ni_peertxcredits = *kqswnal_tunables.kqn_peercredits; - ni->ni_maxtxcredits = *kqswnal_tunables.kqn_credits; - - INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds); - spin_lock_init (&kqswnal_data.kqn_idletxd_lock); - - INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_donetxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds); - - spin_lock_init (&kqswnal_data.kqn_sched_lock); - init_waitqueue_head (&kqswnal_data.kqn_sched_waitq); - - /* pointers/lists/locks initialised */ - kqswnal_data.kqn_init = KQN_INIT_DATA; - PORTAL_MODULE_USE; - - kqswnal_data.kqn_ep = ep_system(); - if (kqswnal_data.kqn_ep == NULL) { - CERROR("Can't initialise EKC\n"); - kqswnal_shutdown(ni); - return (-ENODEV); - } - - if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) { - CERROR("Can't get elan ID\n"); - kqswnal_shutdown(ni); - return (-ENODEV); - } - - kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_ep); - kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_ep); - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), kqswnal_data.kqn_elanid); - - /**********************************************************************/ - /* Get the transmitter */ - - kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep); - if (kqswnal_data.kqn_eptx == NULL) - { - CERROR ("Can't allocate transmitter\n"); - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Get the receivers */ - - kqswnal_data.kqn_eprx_small = - ep_alloc_rcvr (kqswnal_data.kqn_ep, - EP_MSG_SVC_PORTALS_SMALL, - *kqswnal_tunables.kqn_ep_envelopes_small); - if (kqswnal_data.kqn_eprx_small == NULL) - { - CERROR ("Can't install small msg receiver\n"); - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - kqswnal_data.kqn_eprx_large = - ep_alloc_rcvr (kqswnal_data.kqn_ep, - EP_MSG_SVC_PORTALS_LARGE, - *kqswnal_tunables.kqn_ep_envelopes_large); - if (kqswnal_data.kqn_eprx_large == NULL) - { - CERROR ("Can't install large msg receiver\n"); - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Reserve Elan address space for transmit descriptors NB we may - * either send the contents of associated buffers immediately, or - * map them for the peer to suck/blow... */ - kqswnal_data.kqn_ep_tx_nmh = - ep_dvma_reserve(kqswnal_data.kqn_ep, - KQSW_NTXMSGPAGES*(*kqswnal_tunables.kqn_ntxmsgs), - EP_PERM_WRITE); - if (kqswnal_data.kqn_ep_tx_nmh == NULL) { - CERROR("Can't reserve tx dma space\n"); - kqswnal_shutdown(ni); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Reserve Elan address space for receive buffers */ - kqswnal_data.kqn_ep_rx_nmh = - ep_dvma_reserve(kqswnal_data.kqn_ep, - KQSW_NRXMSGPAGES_SMALL * - (*kqswnal_tunables.kqn_nrxmsgs_small) + - KQSW_NRXMSGPAGES_LARGE * - (*kqswnal_tunables.kqn_nrxmsgs_large), - EP_PERM_WRITE); - if (kqswnal_data.kqn_ep_tx_nmh == NULL) { - CERROR("Can't reserve rx dma space\n"); - kqswnal_shutdown(ni); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Allocate/Initialise transmit descriptors */ - - kqswnal_data.kqn_txds = NULL; - for (i = 0; i < (*kqswnal_tunables.kqn_ntxmsgs); i++) - { - int premapped_pages; - int basepage = i * KQSW_NTXMSGPAGES; - - LIBCFS_ALLOC (ktx, sizeof(*ktx)); - if (ktx == NULL) { - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - memset(ktx, 0, sizeof(*ktx)); /* NULL pointers; zero flags */ - ktx->ktx_alloclist = kqswnal_data.kqn_txds; - kqswnal_data.kqn_txds = ktx; - - LIBCFS_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); - if (ktx->ktx_buffer == NULL) - { - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - /* Map pre-allocated buffer NOW, to save latency on transmit */ - premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer, - KQSW_TX_BUFFER_SIZE); - ep_dvma_load(kqswnal_data.kqn_ep, NULL, - ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE, - kqswnal_data.kqn_ep_tx_nmh, basepage, - &all_rails, &ktx->ktx_ebuffer); - - ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */ - ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */ - - INIT_LIST_HEAD (&ktx->ktx_schedlist); - - ktx->ktx_state = KTX_IDLE; - ktx->ktx_rail = -1; /* unset rail */ - - list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_idletxds); - } - - /**********************************************************************/ - /* Allocate/Initialise receive descriptors */ - kqswnal_data.kqn_rxds = NULL; - elan_page_idx = 0; - for (i = 0; i < *kqswnal_tunables.kqn_nrxmsgs_small + *kqswnal_tunables.kqn_nrxmsgs_large; i++) - { - EP_NMD elanbuffer; - int j; - - LIBCFS_ALLOC(krx, sizeof(*krx)); - if (krx == NULL) { - kqswnal_shutdown(ni); - return (-ENOMEM); - } - - memset(krx, 0, sizeof(*krx)); /* clear flags, null pointers etc */ - krx->krx_alloclist = kqswnal_data.kqn_rxds; - kqswnal_data.kqn_rxds = krx; - - if (i < *kqswnal_tunables.kqn_nrxmsgs_small) - { - krx->krx_npages = KQSW_NRXMSGPAGES_SMALL; - krx->krx_eprx = kqswnal_data.kqn_eprx_small; - } - else - { - krx->krx_npages = KQSW_NRXMSGPAGES_LARGE; - krx->krx_eprx = kqswnal_data.kqn_eprx_large; - } - - LASSERT (krx->krx_npages > 0); - for (j = 0; j < krx->krx_npages; j++) - { - struct page *page = alloc_page(GFP_KERNEL); - - if (page == NULL) { - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - krx->krx_kiov[j] = (lnet_kiov_t) {.kiov_page = page, - .kiov_offset = 0, - .kiov_len = PAGE_SIZE}; - LASSERT(page_address(page) != NULL); - - ep_dvma_load(kqswnal_data.kqn_ep, NULL, - page_address(page), - PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh, - elan_page_idx, &all_rails, &elanbuffer); - - if (j == 0) { - krx->krx_elanbuffer = elanbuffer; - } else { - rc = ep_nmd_merge(&krx->krx_elanbuffer, - &krx->krx_elanbuffer, - &elanbuffer); - /* NB contiguous mapping */ - LASSERT(rc); - } - elan_page_idx++; - - } - } - LASSERT (elan_page_idx == - (*kqswnal_tunables.kqn_nrxmsgs_small * KQSW_NRXMSGPAGES_SMALL) + - (*kqswnal_tunables.kqn_nrxmsgs_large * KQSW_NRXMSGPAGES_LARGE)); - - /**********************************************************************/ - /* Queue receives, now that it's OK to run their completion callbacks */ - - for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) { - /* NB this enqueue can allocate/sleep (attr == 0) */ - krx->krx_state = KRX_POSTED; - rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx, - &krx->krx_elanbuffer, 0); - if (rc != EP_SUCCESS) { - CERROR ("failed ep_queue_receive %d\n", rc); - kqswnal_shutdown (ni); - return (-EIO); - } - } - - /**********************************************************************/ - /* Spawn scheduling threads */ - for (i = 0; i < num_online_cpus(); i++) { - rc = kqswnal_thread_start (kqswnal_scheduler, NULL); - if (rc != 0) - { - CERROR ("failed to spawn scheduling thread: %d\n", rc); - kqswnal_shutdown (ni); - return (-ESRCH); - } - } - - kqswnal_data.kqn_init = KQN_INIT_ALL; - return (0); -} - -void __exit -kqswnal_finalise (void) -{ - lnet_unregister_lnd(&the_kqswlnd); - kqswnal_tunables_fini(); -} - -static int __init -kqswnal_initialise (void) -{ - int rc = kqswnal_tunables_init(); - - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kqswlnd); - return (0); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel Quadrics/Elan LND v1.01"); -MODULE_LICENSE("GPL"); - -module_init (kqswnal_initialise); -module_exit (kqswnal_finalise); diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h deleted file mode 100644 index 7016bf940119060f1e7ae7b4dd52340f91de312d..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/qswlnd.h +++ /dev/null @@ -1,351 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef _QSWNAL_H -#define _QSWNAL_H -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#include <qsnet/kernel.h> -#undef printf /* nasty QSW #define */ -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> - -#include <elan/epcomms.h> - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/locks.h> /* wait_on_buffer */ -#else -#include <linux/buffer_head.h> /* wait_on_buffer */ -#endif -#include <linux/unistd.h> -#include <net/sock.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/sysctl.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -/* fixed constants */ -#define KQSW_SMALLMSG (4<<10) /* small/large ep receiver breakpoint */ -#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ - -#define KQSW_CKSUM 0 /* enable checksumming (protocol incompatible) */ - -/* - * derived constants - */ - -#define KQSW_TX_BUFFER_SIZE (offsetof(kqswnal_msg_t, \ - kqm_u.immediate.kqim_payload[*kqswnal_tunables.kqn_tx_maxcontig])) -/* The pre-allocated tx buffer (hdr + small payload) */ - -#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(LNET_MAX_PAYLOAD) + 1) -/* Reserve elan address space for pre-allocated and pre-mapped transmit - * buffer and a full payload too. Extra pages allow for page alignment */ - -#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_SMALLMSG)) -/* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE) - -#define KQSW_NRXMSGPAGES_LARGE (btopr(sizeof(lnet_msg_t) + LNET_MAX_PAYLOAD)) -/* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) -/* biggest complete packet we can receive (or transmit) */ - -/* Wire messages */ -/* Remote memory descriptor */ -typedef struct -{ - __u32 kqrmd_nfrag; /* # frags */ - EP_NMD kqrmd_frag[0]; /* actual frags */ -} kqswnal_remotemd_t; - -/* Immediate data */ -typedef struct -{ - lnet_hdr_t kqim_hdr; /* LNET header */ - char kqim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kqswnal_immediate_msg_t; - -/* RDMA request */ -typedef struct -{ - lnet_hdr_t kqrm_hdr; /* LNET header */ - kqswnal_remotemd_t kqrm_rmd; /* peer's buffer */ -} WIRE_ATTR kqswnal_rdma_msg_t; - -typedef struct -{ - __u32 kqm_magic; /* I'm a qswlnd message */ - __u16 kqm_version; /* this is my version number */ - __u16 kqm_type; /* msg type */ -#if KQSW_CKSUM - __u32 kqm_cksum; /* crc32 checksum */ - __u32 kqm_nob; /* original msg length */ -#endif - union { - kqswnal_immediate_msg_t immediate; - kqswnal_rdma_msg_t rdma; - } WIRE_ATTR kqm_u; -} WIRE_ATTR kqswnal_msg_t; - -#if KQSW_CKSUM /* enable checksums ? */ -# include <linux/crc32.h> -static inline __u32 kqswnal_csum(__u32 crc, unsigned char const *p, size_t len) -{ -#if 1 - return crc32_le(crc, p, len); -#else - while (len-- > 0) - crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ; - return crc; -#endif -} -# define QSWLND_PROTO_VERSION 0xbeef -#else -# define QSWLND_PROTO_VERSION 1 -#endif - -#define QSWLND_MSG_IMMEDIATE 0 -#define QSWLND_MSG_RDMA 1 - -typedef union { - EP_STATUSBLK ep_statusblk; - struct { - __u32 status; - __u32 magic; - __u32 version; - union { - struct { - __u32 len; - __u32 cksum; - } WIRE_ATTR get; - } WIRE_ATTR u; - } WIRE_ATTR msg; -} kqswnal_rpc_reply_t; - -typedef struct kqswnal_rx -{ - struct list_head krx_list; /* enqueue -> thread */ - struct kqswnal_rx *krx_alloclist; /* stack in kqn_rxds */ - EP_RCVR *krx_eprx; /* port to post receives to */ - EP_RXD *krx_rxd; /* receive descriptor (for repost) */ - EP_NMD krx_elanbuffer; /* contiguous Elan buffer */ - int krx_npages; /* # pages in receive buffer */ - int krx_nob; /* Number Of Bytes received into buffer */ - int krx_rpc_reply_needed:1; /* peer waiting for EKC RPC reply */ - int krx_raw_lnet_hdr:1; /* msg is a raw lnet hdr (portals compatible) */ - int krx_state; /* what this RX is doing */ - atomic_t krx_refcount; /* how to tell when rpc is done */ -#if KQSW_CKSUM - __u32 krx_cksum; /* checksum */ -#endif - kqswnal_rpc_reply_t krx_rpc_reply; /* rpc reply status block */ - lnet_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */ -} kqswnal_rx_t; - -#define KRX_POSTED 1 /* receiving */ -#define KRX_PARSE 2 /* ready to be parsed */ -#define KRX_COMPLETING 3 /* waiting to be completed */ - - -typedef struct kqswnal_tx -{ - struct list_head ktx_list; /* enqueue idle/active */ - struct list_head ktx_schedlist; /* enqueue on scheduler */ - struct kqswnal_tx *ktx_alloclist; /* stack in kqn_txds */ - unsigned int ktx_state:7; /* What I'm doing */ - unsigned int ktx_firsttmpfrag:1; /* ktx_frags[0] is in my ebuffer ? 0 : 1 */ - __u32 ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */ - int ktx_npages; /* pages reserved for mapping messages */ - int ktx_nmappedpages; /* # pages mapped for current message */ - int ktx_port; /* destination ep port */ - lnet_nid_t ktx_nid; /* destination node */ - void *ktx_args[3]; /* completion passthru */ - char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */ - cfs_time_t ktx_launchtime; /* when (in jiffies) the transmit - * was launched */ - int ktx_status; /* completion status */ -#if KQSW_CKSUM - __u32 ktx_cksum; /* optimized GET payload checksum */ -#endif - /* debug/info fields */ - pid_t ktx_launcher; /* pid of launching process */ - - int ktx_nfrag; /* # message frags */ - int ktx_rail; /* preferred rail */ - EP_NMD ktx_ebuffer; /* elan mapping of ktx_buffer */ - EP_NMD ktx_frags[EP_MAXFRAG];/* elan mapping of msg frags */ -} kqswnal_tx_t; - -#define KTX_IDLE 0 /* on kqn_idletxds */ -#define KTX_SENDING 1 /* normal send */ -#define KTX_GETTING 2 /* sending optimised get */ -#define KTX_PUTTING 3 /* sending optimised put */ -#define KTX_RDMA_FETCH 4 /* handling optimised put */ -#define KTX_RDMA_STORE 5 /* handling optimised get */ - -typedef struct -{ - int *kqn_tx_maxcontig; /* maximum payload to defrag */ - int *kqn_ntxmsgs; /* # normal tx msgs */ - int *kqn_credits; /* # concurrent sends */ - int *kqn_peercredits; /* # concurrent sends to 1 peer */ - int *kqn_nrxmsgs_large; /* # 'large' rx msgs */ - int *kqn_ep_envelopes_large; /* # 'large' rx ep envelopes */ - int *kqn_nrxmsgs_small; /* # 'small' rx msgs */ - int *kqn_ep_envelopes_small; /* # 'small' rx ep envelopes */ - int *kqn_optimized_puts; /* optimized PUTs? */ - int *kqn_optimized_gets; /* optimized GETs? */ -#if KQSW_CKSUM - int *kqn_inject_csum_error; /* # csum errors to inject */ -#endif - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - cfs_sysctl_table_header_t *kqn_sysctl; /* sysctl interface */ -#endif -} kqswnal_tunables_t; - -typedef struct -{ - char kqn_init; /* what's been initialised */ - char kqn_shuttingdown; /* I'm trying to shut down */ - atomic_t kqn_nthreads; /* # threads running */ - lnet_ni_t *kqn_ni; /* _the_ instance of me */ - - kqswnal_rx_t *kqn_rxds; /* stack of all the receive descriptors */ - kqswnal_tx_t *kqn_txds; /* stack of all the transmit descriptors */ - - struct list_head kqn_idletxds; /* transmit descriptors free to use */ - struct list_head kqn_activetxds; /* transmit descriptors being used */ - spinlock_t kqn_idletxd_lock; /* serialise idle txd access */ - atomic_t kqn_pending_txs; /* # transmits being prepped */ - - spinlock_t kqn_sched_lock; /* serialise packet schedulers */ - wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */ - - struct list_head kqn_readyrxds; /* rxds full of data */ - struct list_head kqn_donetxds; /* completed transmits */ - struct list_head kqn_delayedtxds; /* delayed transmits */ - - EP_SYS *kqn_ep; /* elan system */ - EP_NMH *kqn_ep_tx_nmh; /* elan reserved tx vaddrs */ - EP_NMH *kqn_ep_rx_nmh; /* elan reserved rx vaddrs */ - EP_XMTR *kqn_eptx; /* elan transmitter */ - EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ - EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ - - int kqn_nnodes; /* this cluster's size */ - int kqn_elanid; /* this nodes's elan ID */ - - EP_STATUSBLK kqn_rpc_success; /* preset RPC reply status blocks */ - EP_STATUSBLK kqn_rpc_failed; - EP_STATUSBLK kqn_rpc_version; /* reply to future version query */ - EP_STATUSBLK kqn_rpc_magic; /* reply to future version query */ -} kqswnal_data_t; - -/* kqn_init state */ -#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ -#define KQN_INIT_DATA 1 -#define KQN_INIT_ALL 2 - -extern kqswnal_tunables_t kqswnal_tunables; -extern kqswnal_data_t kqswnal_data; - -extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg); -extern void kqswnal_rxhandler(EP_RXD *rxd); -extern int kqswnal_scheduler (void *); -extern void kqswnal_rx_done (kqswnal_rx_t *krx); - -static inline lnet_nid_t -kqswnal_elanid2nid (int elanid) -{ - return LNET_MKNID(LNET_NIDNET(kqswnal_data.kqn_ni->ni_nid), elanid); -} - -static inline int -kqswnal_nid2elanid (lnet_nid_t nid) -{ - __u32 elanid = LNET_NIDADDR(nid); - - /* not in this cluster? */ - return (elanid >= kqswnal_data.kqn_nnodes) ? -1 : elanid; -} - -static inline lnet_nid_t -kqswnal_rx_nid(kqswnal_rx_t *krx) -{ - return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd))); -} - -static inline int -kqswnal_pages_spanned (void *base, int nob) -{ - unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT; - unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT; - - LASSERT (last_page >= first_page); /* can't wrap address space */ - return (last_page - first_page + 1); -} - -static inline void kqswnal_rx_decref (kqswnal_rx_t *krx) -{ - LASSERT (atomic_read (&krx->krx_refcount) > 0); - if (atomic_dec_and_test (&krx->krx_refcount)) - kqswnal_rx_done(krx); -} - -int kqswnal_startup (lnet_ni_t *ni); -void kqswnal_shutdown (lnet_ni_t *ni); -int kqswnal_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg); -int kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kqswnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); - -int kqswnal_tunables_init(void); -void kqswnal_tunables_fini(void); - -#endif /* _QSWNAL_H */ diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c deleted file mode 100644 index c509d10d603f17acec339361e8b33cb86a631f02..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ /dev/null @@ -1,1832 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswlnd.h" - -void -kqswnal_notify_peer_down(kqswnal_tx_t *ktx) -{ - time_t then; - - then = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - ktx->ktx_launchtime); - - lnet_notify(kqswnal_data.kqn_ni, ktx->ktx_nid, 0, then); -} - -void -kqswnal_unmap_tx (kqswnal_tx_t *ktx) -{ - int i; - - ktx->ktx_rail = -1; /* unset rail */ - - if (ktx->ktx_nmappedpages == 0) - return; - - CDEBUG(D_NET, "%p unloading %d frags starting at %d\n", - ktx, ktx->ktx_nfrag, ktx->ktx_firsttmpfrag); - - for (i = ktx->ktx_firsttmpfrag; i < ktx->ktx_nfrag; i++) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_tx_nmh, - &ktx->ktx_frags[i]); - - ktx->ktx_nmappedpages = 0; -} - -int -kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int offset, int nob, - unsigned int niov, lnet_kiov_t *kiov) -{ - int nfrags = ktx->ktx_nfrag; - int nmapped = ktx->ktx_nmappedpages; - int maxmapped = ktx->ktx_npages; - __u32 basepage = ktx->ktx_basepage + nmapped; - char *ptr; - - EP_RAILMASK railmask; - int rail; - - if (ktx->ktx_rail < 0) - ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx, - EP_RAILMASK_ALL, - kqswnal_nid2elanid(ktx->ktx_nid)); - rail = ktx->ktx_rail; - if (rail < 0) { - CERROR("No rails available for %s\n", libcfs_nid2str(ktx->ktx_nid)); - return (-ENETDOWN); - } - railmask = 1 << rail; - - LASSERT (nmapped <= maxmapped); - LASSERT (nfrags >= ktx->ktx_firsttmpfrag); - LASSERT (nfrags <= EP_MAXFRAG); - LASSERT (niov > 0); - LASSERT (nob > 0); - - /* skip complete frags before 'offset' */ - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - kiov++; - niov--; - LASSERT (niov > 0); - } - - do { - int fraglen = kiov->kiov_len - offset; - - /* each page frag is contained in one page */ - LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE); - - if (fraglen > nob) - fraglen = nob; - - nmapped++; - if (nmapped > maxmapped) { - CERROR("Can't map message in %d pages (max %d)\n", - nmapped, maxmapped); - return (-EMSGSIZE); - } - - if (nfrags == EP_MAXFRAG) { - CERROR("Message too fragmented in Elan VM (max %d frags)\n", - EP_MAXFRAG); - return (-EMSGSIZE); - } - - /* XXX this is really crap, but we'll have to kmap until - * EKC has a page (rather than vaddr) mapping interface */ - - ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; - - CDEBUG(D_NET, - "%p[%d] loading %p for %d, page %d, %d total\n", - ktx, nfrags, ptr, fraglen, basepage, nmapped); - - ep_dvma_load(kqswnal_data.kqn_ep, NULL, - ptr, fraglen, - kqswnal_data.kqn_ep_tx_nmh, basepage, - &railmask, &ktx->ktx_frags[nfrags]); - - if (nfrags == ktx->ktx_firsttmpfrag || - !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1], - &ktx->ktx_frags[nfrags - 1], - &ktx->ktx_frags[nfrags])) { - /* new frag if this is the first or can't merge */ - nfrags++; - } - - kunmap (kiov->kiov_page); - - /* keep in loop for failure case */ - ktx->ktx_nmappedpages = nmapped; - - basepage++; - kiov++; - niov--; - nob -= fraglen; - offset = 0; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - ktx->ktx_nfrag = nfrags; - CDEBUG (D_NET, "%p got %d frags over %d pages\n", - ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages); - - return (0); -} - -#if KQSW_CKSUM -__u32 -kqswnal_csum_kiov (__u32 csum, int offset, int nob, - unsigned int niov, lnet_kiov_t *kiov) -{ - char *ptr; - - if (nob == 0) - return csum; - - LASSERT (niov > 0); - LASSERT (nob > 0); - - /* skip complete frags before 'offset' */ - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - kiov++; - niov--; - LASSERT (niov > 0); - } - - do { - int fraglen = kiov->kiov_len - offset; - - /* each page frag is contained in one page */ - LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE); - - if (fraglen > nob) - fraglen = nob; - - ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; - - csum = kqswnal_csum(csum, ptr, fraglen); - - kunmap (kiov->kiov_page); - - kiov++; - niov--; - nob -= fraglen; - offset = 0; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - return csum; -} -#endif - -int -kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int offset, int nob, - unsigned int niov, struct iovec *iov) -{ - int nfrags = ktx->ktx_nfrag; - int nmapped = ktx->ktx_nmappedpages; - int maxmapped = ktx->ktx_npages; - __u32 basepage = ktx->ktx_basepage + nmapped; - - EP_RAILMASK railmask; - int rail; - - if (ktx->ktx_rail < 0) - ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx, - EP_RAILMASK_ALL, - kqswnal_nid2elanid(ktx->ktx_nid)); - rail = ktx->ktx_rail; - if (rail < 0) { - CERROR("No rails available for %s\n", libcfs_nid2str(ktx->ktx_nid)); - return (-ENETDOWN); - } - railmask = 1 << rail; - - LASSERT (nmapped <= maxmapped); - LASSERT (nfrags >= ktx->ktx_firsttmpfrag); - LASSERT (nfrags <= EP_MAXFRAG); - LASSERT (niov > 0); - LASSERT (nob > 0); - - /* skip complete frags before offset */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - - do { - int fraglen = iov->iov_len - offset; - long npages; - - if (fraglen > nob) - fraglen = nob; - npages = kqswnal_pages_spanned (iov->iov_base, fraglen); - - nmapped += npages; - if (nmapped > maxmapped) { - CERROR("Can't map message in %d pages (max %d)\n", - nmapped, maxmapped); - return (-EMSGSIZE); - } - - if (nfrags == EP_MAXFRAG) { - CERROR("Message too fragmented in Elan VM (max %d frags)\n", - EP_MAXFRAG); - return (-EMSGSIZE); - } - - CDEBUG(D_NET, - "%p[%d] loading %p for %d, pages %d for %ld, %d total\n", - ktx, nfrags, iov->iov_base + offset, fraglen, - basepage, npages, nmapped); - - ep_dvma_load(kqswnal_data.kqn_ep, NULL, - iov->iov_base + offset, fraglen, - kqswnal_data.kqn_ep_tx_nmh, basepage, - &railmask, &ktx->ktx_frags[nfrags]); - - if (nfrags == ktx->ktx_firsttmpfrag || - !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1], - &ktx->ktx_frags[nfrags - 1], - &ktx->ktx_frags[nfrags])) { - /* new frag if this is the first or can't merge */ - nfrags++; - } - - /* keep in loop for failure case */ - ktx->ktx_nmappedpages = nmapped; - - basepage += npages; - iov++; - niov--; - nob -= fraglen; - offset = 0; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - ktx->ktx_nfrag = nfrags; - CDEBUG (D_NET, "%p got %d frags over %d pages\n", - ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages); - - return (0); -} - -#if KQSW_CKSUM -__u32 -kqswnal_csum_iov (__u32 csum, int offset, int nob, - unsigned int niov, struct iovec *iov) -{ - if (nob == 0) - return csum; - - LASSERT (niov > 0); - LASSERT (nob > 0); - - /* skip complete frags before offset */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - - do { - int fraglen = iov->iov_len - offset; - - if (fraglen > nob) - fraglen = nob; - - csum = kqswnal_csum(csum, iov->iov_base + offset, fraglen); - - iov++; - niov--; - nob -= fraglen; - offset = 0; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - return csum; -} -#endif - -void -kqswnal_put_idle_tx (kqswnal_tx_t *ktx) -{ - unsigned long flags; - - kqswnal_unmap_tx (ktx); /* release temporary mappings */ - ktx->ktx_state = KTX_IDLE; - - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - list_del (&ktx->ktx_list); /* take off active list */ - list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds); - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); -} - -kqswnal_tx_t * -kqswnal_get_idle_tx (void) -{ - unsigned long flags; - kqswnal_tx_t *ktx; - - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - if (kqswnal_data.kqn_shuttingdown || - list_empty (&kqswnal_data.kqn_idletxds)) { - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - return NULL; - } - - ktx = list_entry (kqswnal_data.kqn_idletxds.next, kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); - - list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds); - ktx->ktx_launcher = current->pid; - atomic_inc(&kqswnal_data.kqn_pending_txs); - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */ - LASSERT (ktx->ktx_nmappedpages == 0); - return (ktx); -} - -void -kqswnal_tx_done_in_thread_context (kqswnal_tx_t *ktx) -{ - lnet_msg_t *lnetmsg0 = NULL; - lnet_msg_t *lnetmsg1 = NULL; - int status0 = 0; - int status1 = 0; - kqswnal_rx_t *krx; - - LASSERT (!in_interrupt()); - - if (ktx->ktx_status == -EHOSTDOWN) - kqswnal_notify_peer_down(ktx); - - switch (ktx->ktx_state) { - case KTX_RDMA_FETCH: /* optimized PUT/REPLY handled */ - krx = (kqswnal_rx_t *)ktx->ktx_args[0]; - lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1]; - status0 = ktx->ktx_status; -#if KQSW_CKSUM - if (status0 == 0) { /* RDMA succeeded */ - kqswnal_msg_t *msg; - __u32 csum; - - msg = (kqswnal_msg_t *) - page_address(krx->krx_kiov[0].kiov_page); - - csum = (lnetmsg0->msg_kiov != NULL) ? - kqswnal_csum_kiov(krx->krx_cksum, - lnetmsg0->msg_offset, - lnetmsg0->msg_wanted, - lnetmsg0->msg_niov, - lnetmsg0->msg_kiov) : - kqswnal_csum_iov(krx->krx_cksum, - lnetmsg0->msg_offset, - lnetmsg0->msg_wanted, - lnetmsg0->msg_niov, - lnetmsg0->msg_iov); - - /* Can only check csum if I got it all */ - if (lnetmsg0->msg_wanted == lnetmsg0->msg_len && - csum != msg->kqm_cksum) { - ktx->ktx_status = -EIO; - krx->krx_rpc_reply.msg.status = -EIO; - CERROR("RDMA checksum failed %u(%u) from %s\n", - csum, msg->kqm_cksum, - libcfs_nid2str(kqswnal_rx_nid(krx))); - } - } -#endif - LASSERT (krx->krx_state == KRX_COMPLETING); - kqswnal_rx_decref (krx); - break; - - case KTX_RDMA_STORE: /* optimized GET handled */ - case KTX_PUTTING: /* optimized PUT sent */ - case KTX_SENDING: /* normal send */ - lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1]; - status0 = ktx->ktx_status; - break; - - case KTX_GETTING: /* optimized GET sent & payload received */ - /* Complete the GET with success since we can't avoid - * delivering a REPLY event; we committed to it when we - * launched the GET */ - lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1]; - status0 = 0; - lnetmsg1 = (lnet_msg_t *)ktx->ktx_args[2]; - status1 = ktx->ktx_status; -#if KQSW_CKSUM - if (status1 == 0) { /* RDMA succeeded */ - lnet_msg_t *lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1]; - lnet_libmd_t *md = lnetmsg0->msg_md; - __u32 csum; - - csum = ((md->md_options & LNET_MD_KIOV) != 0) ? - kqswnal_csum_kiov(~0, 0, - md->md_length, - md->md_niov, - md->md_iov.kiov) : - kqswnal_csum_iov(~0, 0, - md->md_length, - md->md_niov, - md->md_iov.iov); - - if (csum != ktx->ktx_cksum) { - CERROR("RDMA checksum failed %u(%u) from %s\n", - csum, ktx->ktx_cksum, - libcfs_nid2str(ktx->ktx_nid)); - status1 = -EIO; - } - } -#endif - break; - - default: - LASSERT (0); - } - - kqswnal_put_idle_tx (ktx); - - lnet_finalize (kqswnal_data.kqn_ni, lnetmsg0, status0); - if (lnetmsg1 != NULL) - lnet_finalize (kqswnal_data.kqn_ni, lnetmsg1, status1); -} - -void -kqswnal_tx_done (kqswnal_tx_t *ktx, int status) -{ - unsigned long flags; - - ktx->ktx_status = status; - - if (!in_interrupt()) { - kqswnal_tx_done_in_thread_context(ktx); - return; - } - - /* Complete the send in thread context */ - spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail(&ktx->ktx_schedlist, - &kqswnal_data.kqn_donetxds); - wake_up(&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags); -} - -static void -kqswnal_txhandler(EP_TXD *txd, void *arg, int status) -{ - kqswnal_tx_t *ktx = (kqswnal_tx_t *)arg; - kqswnal_rpc_reply_t *reply; - - LASSERT (txd != NULL); - LASSERT (ktx != NULL); - - CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status); - - if (status != EP_SUCCESS) { - - CDEBUG (D_NETERROR, "Tx completion to %s failed: %d\n", - libcfs_nid2str(ktx->ktx_nid), status); - - status = -EHOSTDOWN; - - } else switch (ktx->ktx_state) { - - case KTX_GETTING: - case KTX_PUTTING: - /* RPC complete! */ - reply = (kqswnal_rpc_reply_t *)ep_txd_statusblk(txd); - if (reply->msg.magic == 0) { /* "old" peer */ - status = reply->msg.status; - break; - } - - if (reply->msg.magic != LNET_PROTO_QSW_MAGIC) { - if (reply->msg.magic != swab32(LNET_PROTO_QSW_MAGIC)) { - CERROR("%s unexpected rpc reply magic %08x\n", - libcfs_nid2str(ktx->ktx_nid), - reply->msg.magic); - status = -EPROTO; - break; - } - - __swab32s(&reply->msg.status); - __swab32s(&reply->msg.version); - - if (ktx->ktx_state == KTX_GETTING) { - __swab32s(&reply->msg.u.get.len); - __swab32s(&reply->msg.u.get.cksum); - } - } - - status = reply->msg.status; - if (status != 0) { - CERROR("%s RPC status %08x\n", - libcfs_nid2str(ktx->ktx_nid), status); - break; - } - - if (ktx->ktx_state == KTX_GETTING) { - lnet_set_reply_msg_len(kqswnal_data.kqn_ni, - (lnet_msg_t *)ktx->ktx_args[2], - reply->msg.u.get.len); -#if KQSW_CKSUM - ktx->ktx_cksum = reply->msg.u.get.cksum; -#endif - } - break; - - case KTX_SENDING: - status = 0; - break; - - default: - LBUG(); - break; - } - - kqswnal_tx_done(ktx, status); -} - -int -kqswnal_launch (kqswnal_tx_t *ktx) -{ - /* Don't block for transmit descriptor if we're in interrupt context */ - int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0; - int dest = kqswnal_nid2elanid (ktx->ktx_nid); - unsigned long flags; - int rc; - - ktx->ktx_launchtime = cfs_time_current(); - - if (kqswnal_data.kqn_shuttingdown) - return (-ESHUTDOWN); - - LASSERT (dest >= 0); /* must be a peer */ - - if (ktx->ktx_nmappedpages != 0) - attr = EP_SET_PREFRAIL(attr, ktx->ktx_rail); - - switch (ktx->ktx_state) { - case KTX_GETTING: - case KTX_PUTTING: - if (the_lnet.ln_testprotocompat != 0 && - the_lnet.ln_ptlcompat == 0) { - kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer; - - /* single-shot proto test: - * Future version queries will use an RPC, so I'll - * co-opt one of the existing ones */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - msg->kqm_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - msg->kqm_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t. - * The other frags are the payload, awaiting RDMA */ - rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest, - ktx->ktx_port, attr, - kqswnal_txhandler, ktx, - NULL, ktx->ktx_frags, 1); - break; - - case KTX_SENDING: - rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest, - ktx->ktx_port, attr, - kqswnal_txhandler, ktx, - NULL, ktx->ktx_frags, ktx->ktx_nfrag); - break; - - default: - LBUG(); - rc = -EINVAL; /* no compiler warning please */ - break; - } - - switch (rc) { - case EP_SUCCESS: /* success */ - return (0); - - case EP_ENOMEM: /* can't allocate ep txd => queue for later */ - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&ktx->ktx_schedlist, &kqswnal_data.kqn_delayedtxds); - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); - return (0); - - default: /* fatal error */ - CDEBUG (D_NETERROR, "Tx to %s failed: %d\n", libcfs_nid2str(ktx->ktx_nid), rc); - kqswnal_notify_peer_down(ktx); - return (-EHOSTUNREACH); - } -} - -#if 0 -static char * -hdr_type_string (lnet_hdr_t *hdr) -{ - switch (hdr->type) { - case LNET_MSG_ACK: - return ("ACK"); - case LNET_MSG_PUT: - return ("PUT"); - case LNET_MSG_GET: - return ("GET"); - case LNET_MSG_REPLY: - return ("REPLY"); - default: - return ("<UNKNOWN>"); - } -} - -static void -kqswnal_cerror_hdr(lnet_hdr_t * hdr) -{ - char *type_str = hdr_type_string (hdr); - - CERROR("P3 Header at %p of type %s length %d\n", hdr, type_str, - le32_to_cpu(hdr->payload_length)); - CERROR(" From nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->src_nid), - le32_to_cpu(hdr->src_pid)); - CERROR(" To nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->dest_nid), - le32_to_cpu(hdr->dest_pid)); - - switch (le32_to_cpu(hdr->type)) { - case LNET_MSG_PUT: - CERROR(" Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPX64"\n", - le32_to_cpu(hdr->msg.put.ptl_index), - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - le64_to_cpu(hdr->msg.put.match_bits)); - CERROR(" offset %d, hdr data "LPX64"\n", - le32_to_cpu(hdr->msg.put.offset), - hdr->msg.put.hdr_data); - break; - - case LNET_MSG_GET: - CERROR(" Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPX64"\n", - le32_to_cpu(hdr->msg.get.ptl_index), - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - CERROR(" Length %d, src offset %d\n", - le32_to_cpu(hdr->msg.get.sink_length), - le32_to_cpu(hdr->msg.get.src_offset)); - break; - - case LNET_MSG_ACK: - CERROR(" dst md "LPX64"."LPX64", manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - le32_to_cpu(hdr->msg.ack.mlength)); - break; - - case LNET_MSG_REPLY: - CERROR(" dst md "LPX64"."LPX64"\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie); - } - -} /* end of print_hdr() */ -#endif - -int -kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag, - int nrfrag, EP_NMD *rfrag) -{ - int i; - - if (nlfrag != nrfrag) { - CERROR("Can't cope with unequal # frags: %d local %d remote\n", - nlfrag, nrfrag); - return (-EINVAL); - } - - for (i = 0; i < nlfrag; i++) - if (lfrag[i].nmd_len != rfrag[i].nmd_len) { - CERROR("Can't cope with unequal frags %d(%d):" - " %d local %d remote\n", - i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len); - return (-EINVAL); - } - - return (0); -} - -kqswnal_remotemd_t * -kqswnal_get_portalscompat_rmd (kqswnal_rx_t *krx) -{ - /* Check that the RMD sent after the "raw" LNET header in a - * portals-compatible QSWLND message is OK */ - char *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page); - kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + sizeof(lnet_hdr_t)); - - /* Note RDMA addresses are sent in native endian-ness in the "old" - * portals protocol so no swabbing... */ - - if (buffer + krx->krx_nob < (char *)(rmd + 1)) { - /* msg too small to discover rmd size */ - CERROR ("Incoming message [%d] too small for RMD (%d needed)\n", - krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer)); - return (NULL); - } - - if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) { - /* rmd doesn't fit in the incoming message */ - CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n", - krx->krx_nob, rmd->kqrmd_nfrag, - (int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer)); - return (NULL); - } - - return (rmd); -} - -void -kqswnal_rdma_store_complete (EP_RXD *rxd) -{ - int status = ep_rxd_status(rxd); - kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0]; - - CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, - "rxd %p, ktx %p, status %d\n", rxd, ktx, status); - - LASSERT (ktx->ktx_state == KTX_RDMA_STORE); - LASSERT (krx->krx_rxd == rxd); - LASSERT (krx->krx_rpc_reply_needed); - - krx->krx_rpc_reply_needed = 0; - kqswnal_rx_decref (krx); - - /* free ktx & finalize() its lnet_msg_t */ - kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED); -} - -void -kqswnal_rdma_fetch_complete (EP_RXD *rxd) -{ - /* Completed fetching the PUT/REPLY data */ - int status = ep_rxd_status(rxd); - kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0]; - - CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, - "rxd %p, ktx %p, status %d\n", rxd, ktx, status); - - LASSERT (ktx->ktx_state == KTX_RDMA_FETCH); - LASSERT (krx->krx_rxd == rxd); - /* RPC completes with failure by default */ - LASSERT (krx->krx_rpc_reply_needed); - LASSERT (krx->krx_rpc_reply.msg.status != 0); - - if (status == EP_SUCCESS) { - krx->krx_rpc_reply.msg.status = 0; - status = 0; - } else { - /* Abandon RPC since get failed */ - krx->krx_rpc_reply_needed = 0; - status = -ECONNABORTED; - } - - /* krx gets decref'd in kqswnal_tx_done_in_thread_context() */ - LASSERT (krx->krx_state == KRX_PARSE); - krx->krx_state = KRX_COMPLETING; - - /* free ktx & finalize() its lnet_msg_t */ - kqswnal_tx_done(ktx, status); -} - -int -kqswnal_rdma (kqswnal_rx_t *krx, lnet_msg_t *lntmsg, - int type, kqswnal_remotemd_t *rmd, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int len) -{ - kqswnal_tx_t *ktx; - int eprc; - int rc; - - /* Not both mapped and paged payload */ - LASSERT (iov == NULL || kiov == NULL); - /* RPC completes with failure by default */ - LASSERT (krx->krx_rpc_reply_needed); - LASSERT (krx->krx_rpc_reply.msg.status != 0); - - if (len == 0) { - /* data got truncated to nothing. */ - lnet_finalize(kqswnal_data.kqn_ni, lntmsg, 0); - /* Let kqswnal_rx_done() complete the RPC with success */ - krx->krx_rpc_reply.msg.status = 0; - return (0); - } - - /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not - actually sending a portals message with it */ - ktx = kqswnal_get_idle_tx(); - if (ktx == NULL) { - CERROR ("Can't get txd for RDMA with %s\n", - libcfs_nid2str(kqswnal_rx_nid(krx))); - return (-ENOMEM); - } - - ktx->ktx_state = type; - ktx->ktx_nid = kqswnal_rx_nid(krx); - ktx->ktx_args[0] = krx; - ktx->ktx_args[1] = lntmsg; - - LASSERT (atomic_read(&krx->krx_refcount) > 0); - /* Take an extra ref for the completion callback */ - atomic_inc(&krx->krx_refcount); - - /* Map on the rail the RPC prefers */ - ktx->ktx_rail = ep_rcvr_prefrail(krx->krx_eprx, - ep_rxd_railmask(krx->krx_rxd)); - - /* Start mapping at offset 0 (we're not mapping any headers) */ - ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0; - - if (kiov != NULL) - rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov); - else - rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov); - - if (rc != 0) { - CERROR ("Can't map local RDMA data: %d\n", rc); - goto out; - } - - rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags, - rmd->kqrmd_nfrag, rmd->kqrmd_frag); - if (rc != 0) { - CERROR ("Incompatible RDMA descriptors\n"); - goto out; - } - - switch (type) { - default: - LBUG(); - - case KTX_RDMA_STORE: - krx->krx_rpc_reply.msg.status = 0; - krx->krx_rpc_reply.msg.magic = LNET_PROTO_QSW_MAGIC; - krx->krx_rpc_reply.msg.version = QSWLND_PROTO_VERSION; - krx->krx_rpc_reply.msg.u.get.len = len; -#if KQSW_CKSUM - krx->krx_rpc_reply.msg.u.get.cksum = (kiov != NULL) ? - kqswnal_csum_kiov(~0, offset, len, niov, kiov) : - kqswnal_csum_iov(~0, offset, len, niov, iov); - if (*kqswnal_tunables.kqn_inject_csum_error == 4) { - krx->krx_rpc_reply.msg.u.get.cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } -#endif - eprc = ep_complete_rpc(krx->krx_rxd, - kqswnal_rdma_store_complete, ktx, - &krx->krx_rpc_reply.ep_statusblk, - ktx->ktx_frags, rmd->kqrmd_frag, - rmd->kqrmd_nfrag); - if (eprc != EP_SUCCESS) { - CERROR("can't complete RPC: %d\n", eprc); - /* don't re-attempt RPC completion */ - krx->krx_rpc_reply_needed = 0; - rc = -ECONNABORTED; - } - break; - - case KTX_RDMA_FETCH: - eprc = ep_rpc_get (krx->krx_rxd, - kqswnal_rdma_fetch_complete, ktx, - rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag); - if (eprc != EP_SUCCESS) { - CERROR("ep_rpc_get failed: %d\n", eprc); - /* Don't attempt RPC completion: - * EKC nuked it when the get failed */ - krx->krx_rpc_reply_needed = 0; - rc = -ECONNABORTED; - } - break; - } - - out: - if (rc != 0) { - kqswnal_rx_decref(krx); /* drop callback's ref */ - kqswnal_put_idle_tx (ktx); - } - - atomic_dec(&kqswnal_data.kqn_pending_txs); - return (rc); -} - -int -kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - int nob; - kqswnal_tx_t *ktx; - int rc; - - /* NB 1. hdr is in network byte order */ - /* 2. 'private' depends on the message type */ - - CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* It must be OK to kmap() if required */ - LASSERT (payload_kiov == NULL || !in_interrupt ()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - if (kqswnal_nid2elanid (target.nid) < 0) { - CERROR("%s not in my cluster\n", libcfs_nid2str(target.nid)); - return -EIO; - } - - /* I may not block for a transmit descriptor if I might block the - * router, receiver, or an interrupt handler. */ - ktx = kqswnal_get_idle_tx(); - if (ktx == NULL) { - CERROR ("Can't get txd for msg type %d for %s\n", - type, libcfs_nid2str(target.nid)); - return (-ENOMEM); - } - - ktx->ktx_state = KTX_SENDING; - ktx->ktx_nid = target.nid; - ktx->ktx_args[0] = private; - ktx->ktx_args[1] = lntmsg; - ktx->ktx_args[2] = NULL; /* set when a GET commits to REPLY */ - - /* The first frag will be the pre-mapped buffer. */ - ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1; - - if ((!target_is_router && /* target.nid is final dest */ - !routing && /* I'm the source */ - type == LNET_MSG_GET && /* optimize GET? */ - *kqswnal_tunables.kqn_optimized_gets != 0 && - lntmsg->msg_md->md_length >= - *kqswnal_tunables.kqn_optimized_gets) || - ((type == LNET_MSG_PUT || /* optimize PUT? */ - type == LNET_MSG_REPLY) && /* optimize REPLY? */ - *kqswnal_tunables.kqn_optimized_puts != 0 && - payload_nob >= *kqswnal_tunables.kqn_optimized_puts)) { - lnet_libmd_t *md = lntmsg->msg_md; - kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer; - lnet_hdr_t *mhdr; - kqswnal_remotemd_t *rmd; - - /* Optimised path: I send over the Elan vaddrs of the local - * buffers, and my peer DMAs directly to/from them. - * - * First I set up ktx as if it was going to send this - * payload, (it needs to map it anyway). This fills - * ktx_frags[1] and onward with the network addresses - * of the buffer frags. */ - - if (the_lnet.ln_ptlcompat == 2) { - /* Strong portals compatibility: send "raw" LNET - * header + rdma descriptor */ - mhdr = (lnet_hdr_t *)ktx->ktx_buffer; - rmd = (kqswnal_remotemd_t *)(mhdr + 1); - } else { - /* Send an RDMA message */ - msg->kqm_magic = LNET_PROTO_QSW_MAGIC; - msg->kqm_version = QSWLND_PROTO_VERSION; - msg->kqm_type = QSWLND_MSG_RDMA; - - mhdr = &msg->kqm_u.rdma.kqrm_hdr; - rmd = &msg->kqm_u.rdma.kqrm_rmd; - } - - *mhdr = *hdr; - nob = (((char *)rmd) - ktx->ktx_buffer); - - if (type == LNET_MSG_GET) { - if ((md->md_options & LNET_MD_KIOV) != 0) - rc = kqswnal_map_tx_kiov (ktx, 0, md->md_length, - md->md_niov, md->md_iov.kiov); - else - rc = kqswnal_map_tx_iov (ktx, 0, md->md_length, - md->md_niov, md->md_iov.iov); - ktx->ktx_state = KTX_GETTING; - } else { - if (payload_kiov != NULL) - rc = kqswnal_map_tx_kiov(ktx, 0, payload_nob, - payload_niov, payload_kiov); - else - rc = kqswnal_map_tx_iov(ktx, 0, payload_nob, - payload_niov, payload_iov); - ktx->ktx_state = KTX_PUTTING; - } - - if (rc != 0) - goto out; - - rmd->kqrmd_nfrag = ktx->ktx_nfrag - 1; - nob += offsetof(kqswnal_remotemd_t, - kqrmd_frag[rmd->kqrmd_nfrag]); - LASSERT (nob <= KQSW_TX_BUFFER_SIZE); - - memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1], - rmd->kqrmd_nfrag * sizeof(EP_NMD)); - - ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob); -#if KQSW_CKSUM - LASSERT (the_lnet.ln_ptlcompat != 2); - msg->kqm_nob = nob + payload_nob; - msg->kqm_cksum = 0; - msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob); -#endif - if (type == LNET_MSG_GET) { - /* Allocate reply message now while I'm in thread context */ - ktx->ktx_args[2] = lnet_create_reply_msg ( - kqswnal_data.kqn_ni, lntmsg); - if (ktx->ktx_args[2] == NULL) - goto out; - - /* NB finalizing the REPLY message is my - * responsibility now, whatever happens. */ -#if KQSW_CKSUM - if (*kqswnal_tunables.kqn_inject_csum_error == 3) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } - - } else if (payload_kiov != NULL) { - /* must checksum payload after header so receiver can - * compute partial header cksum before swab. Sadly - * this causes 2 rounds of kmap */ - msg->kqm_cksum = - kqswnal_csum_kiov(msg->kqm_cksum, 0, payload_nob, - payload_niov, payload_kiov); - if (*kqswnal_tunables.kqn_inject_csum_error == 2) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } - } else { - msg->kqm_cksum = - kqswnal_csum_iov(msg->kqm_cksum, 0, payload_nob, - payload_niov, payload_iov); - if (*kqswnal_tunables.kqn_inject_csum_error == 2) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } -#endif - } - - } else if (payload_nob <= *kqswnal_tunables.kqn_tx_maxcontig) { - lnet_hdr_t *mhdr; - char *payload; - kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer; - - /* small message: single frag copied into the pre-mapped buffer */ - if (the_lnet.ln_ptlcompat == 2) { - /* Strong portals compatibility: send "raw" LNET header - * + payload */ - mhdr = (lnet_hdr_t *)ktx->ktx_buffer; - payload = (char *)(mhdr + 1); - } else { - /* Send an IMMEDIATE message */ - msg->kqm_magic = LNET_PROTO_QSW_MAGIC; - msg->kqm_version = QSWLND_PROTO_VERSION; - msg->kqm_type = QSWLND_MSG_IMMEDIATE; - - mhdr = &msg->kqm_u.immediate.kqim_hdr; - payload = msg->kqm_u.immediate.kqim_payload; - } - - *mhdr = *hdr; - nob = (payload - ktx->ktx_buffer) + payload_nob; - - ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob); - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(KQSW_TX_BUFFER_SIZE, payload, 0, - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(KQSW_TX_BUFFER_SIZE, payload, 0, - payload_niov, payload_iov, - payload_offset, payload_nob); -#if KQSW_CKSUM - LASSERT (the_lnet.ln_ptlcompat != 2); - msg->kqm_nob = nob; - msg->kqm_cksum = 0; - msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob); - if (*kqswnal_tunables.kqn_inject_csum_error == 1) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } -#endif - } else { - lnet_hdr_t *mhdr; - kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer; - - /* large message: multiple frags: first is hdr in pre-mapped buffer */ - if (the_lnet.ln_ptlcompat == 2) { - /* Strong portals compatibility: send "raw" LNET header - * + payload */ - mhdr = (lnet_hdr_t *)ktx->ktx_buffer; - nob = sizeof(lnet_hdr_t); - } else { - /* Send an IMMEDIATE message */ - msg->kqm_magic = LNET_PROTO_QSW_MAGIC; - msg->kqm_version = QSWLND_PROTO_VERSION; - msg->kqm_type = QSWLND_MSG_IMMEDIATE; - - mhdr = &msg->kqm_u.immediate.kqim_hdr; - nob = offsetof(kqswnal_msg_t, - kqm_u.immediate.kqim_payload); - } - - *mhdr = *hdr; - - ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob); - - if (payload_kiov != NULL) - rc = kqswnal_map_tx_kiov (ktx, payload_offset, payload_nob, - payload_niov, payload_kiov); - else - rc = kqswnal_map_tx_iov (ktx, payload_offset, payload_nob, - payload_niov, payload_iov); - if (rc != 0) - goto out; - -#if KQSW_CKSUM - msg->kqm_nob = nob + payload_nob; - msg->kqm_cksum = 0; - msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob); - - msg->kqm_cksum = (payload_kiov != NULL) ? - kqswnal_csum_kiov(msg->kqm_cksum, - payload_offset, payload_nob, - payload_niov, payload_kiov) : - kqswnal_csum_iov(msg->kqm_cksum, - payload_offset, payload_nob, - payload_niov, payload_iov); - - if (*kqswnal_tunables.kqn_inject_csum_error == 1) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } -#endif - nob += payload_nob; - } - - ktx->ktx_port = (nob <= KQSW_SMALLMSG) ? - EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE; - - rc = kqswnal_launch (ktx); - - out: - CDEBUG(rc == 0 ? D_NET : D_NETERROR, "%s %d bytes to %s%s: rc %d\n", - routing ? (rc == 0 ? "Routed" : "Failed to route") : - (rc == 0 ? "Sent" : "Failed to send"), - nob, libcfs_nid2str(target.nid), - target_is_router ? "(router)" : "", rc); - - if (rc != 0) { - lnet_msg_t *repmsg = (lnet_msg_t *)ktx->ktx_args[2]; - int state = ktx->ktx_state; - - kqswnal_put_idle_tx (ktx); - - if (state == KTX_GETTING && repmsg != NULL) { - /* We committed to reply, but there was a problem - * launching the GET. We can't avoid delivering a - * REPLY event since we committed above, so we - * pretend the GET succeeded but the REPLY - * failed. */ - rc = 0; - lnet_finalize (kqswnal_data.kqn_ni, lntmsg, 0); - lnet_finalize (kqswnal_data.kqn_ni, repmsg, -EIO); - } - - } - - atomic_dec(&kqswnal_data.kqn_pending_txs); - return (rc == 0 ? 0 : -EIO); -} - -void -kqswnal_requeue_rx (kqswnal_rx_t *krx) -{ - LASSERT (atomic_read(&krx->krx_refcount) == 0); - LASSERT (!krx->krx_rpc_reply_needed); - - krx->krx_state = KRX_POSTED; - - if (kqswnal_data.kqn_shuttingdown) { - /* free EKC rxd on shutdown */ - ep_complete_receive(krx->krx_rxd); - } else { - /* repost receive */ - ep_requeue_receive(krx->krx_rxd, - kqswnal_rxhandler, krx, - &krx->krx_elanbuffer, 0); - } -} - -void -kqswnal_rpc_complete (EP_RXD *rxd) -{ - int status = ep_rxd_status(rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg(rxd); - - CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, - "rxd %p, krx %p, status %d\n", rxd, krx, status); - - LASSERT (krx->krx_rxd == rxd); - LASSERT (krx->krx_rpc_reply_needed); - - krx->krx_rpc_reply_needed = 0; - kqswnal_requeue_rx (krx); -} - -void -kqswnal_rx_done (kqswnal_rx_t *krx) -{ - int rc; - - LASSERT (atomic_read(&krx->krx_refcount) == 0); - - if (krx->krx_rpc_reply_needed) { - /* We've not completed the peer's RPC yet... */ - krx->krx_rpc_reply.msg.magic = LNET_PROTO_QSW_MAGIC; - krx->krx_rpc_reply.msg.version = QSWLND_PROTO_VERSION; - - LASSERT (!in_interrupt()); - - rc = ep_complete_rpc(krx->krx_rxd, - kqswnal_rpc_complete, krx, - &krx->krx_rpc_reply.ep_statusblk, - NULL, NULL, 0); - if (rc == EP_SUCCESS) - return; - - CERROR("can't complete RPC: %d\n", rc); - krx->krx_rpc_reply_needed = 0; - } - - kqswnal_requeue_rx(krx); -} - -void -kqswnal_parse (kqswnal_rx_t *krx) -{ - lnet_ni_t *ni = kqswnal_data.kqn_ni; - kqswnal_msg_t *msg = (kqswnal_msg_t *)page_address(krx->krx_kiov[0].kiov_page); - lnet_nid_t fromnid = kqswnal_rx_nid(krx); - int swab; - int n; - int i; - int nob; - int rc; - - LASSERT (atomic_read(&krx->krx_refcount) == 1); - - /* If ln_ptlcompat is set, peers may send me an "old" unencapsulated - * lnet hdr */ - LASSERT (offsetof(kqswnal_msg_t, kqm_u) <= sizeof(lnet_hdr_t)); - - if (krx->krx_nob < offsetof(kqswnal_msg_t, kqm_u)) { - CERROR("Short message %d received from %s\n", - krx->krx_nob, libcfs_nid2str(fromnid)); - goto done; - } - - swab = msg->kqm_magic == __swab32(LNET_PROTO_QSW_MAGIC); - - if (swab || msg->kqm_magic == LNET_PROTO_QSW_MAGIC) { -#if KQSW_CKSUM - __u32 csum0; - __u32 csum1; - - /* csum byte array before swab */ - csum1 = msg->kqm_cksum; - msg->kqm_cksum = 0; - csum0 = kqswnal_csum_kiov(~0, 0, krx->krx_nob, - krx->krx_npages, krx->krx_kiov); - msg->kqm_cksum = csum1; -#endif - - if (swab) { - __swab16s(&msg->kqm_version); - __swab16s(&msg->kqm_type); -#if KQSW_CKSUM - __swab32s(&msg->kqm_cksum); - __swab32s(&msg->kqm_nob); -#endif - } - - if (msg->kqm_version != QSWLND_PROTO_VERSION) { - /* Future protocol version compatibility support! - * The next qswlnd-specific protocol rev will first - * send an RPC to check version. - * 1.4.6 and 1.4.7.early reply with a status - * block containing its current version. - * Later versions send a failure (-ve) status + - * magic/version */ - - if (!krx->krx_rpc_reply_needed) { - CERROR("Unexpected version %d from %s\n", - msg->kqm_version, libcfs_nid2str(fromnid)); - goto done; - } - - LASSERT (krx->krx_rpc_reply.msg.status == -EPROTO); - goto done; - } - - switch (msg->kqm_type) { - default: - CERROR("Bad request type %x from %s\n", - msg->kqm_type, libcfs_nid2str(fromnid)); - goto done; - - case QSWLND_MSG_IMMEDIATE: - if (krx->krx_rpc_reply_needed) { - /* Should have been a simple message */ - CERROR("IMMEDIATE sent as RPC from %s\n", - libcfs_nid2str(fromnid)); - goto done; - } - - nob = offsetof(kqswnal_msg_t, kqm_u.immediate.kqim_payload); - if (krx->krx_nob < nob) { - CERROR("Short IMMEDIATE %d(%d) from %s\n", - krx->krx_nob, nob, libcfs_nid2str(fromnid)); - goto done; - } - -#if KQSW_CKSUM - if (csum0 != msg->kqm_cksum) { - CERROR("Bad IMMEDIATE checksum %08x(%08x) from %s\n", - csum0, msg->kqm_cksum, libcfs_nid2str(fromnid)); - CERROR("nob %d (%d)\n", krx->krx_nob, msg->kqm_nob); - goto done; - } -#endif - rc = lnet_parse(ni, &msg->kqm_u.immediate.kqim_hdr, - fromnid, krx, 0); - if (rc < 0) - goto done; - return; - - case QSWLND_MSG_RDMA: - if (!krx->krx_rpc_reply_needed) { - /* Should have been a simple message */ - CERROR("RDMA sent as simple message from %s\n", - libcfs_nid2str(fromnid)); - goto done; - } - - nob = offsetof(kqswnal_msg_t, - kqm_u.rdma.kqrm_rmd.kqrmd_frag[0]); - if (krx->krx_nob < nob) { - CERROR("Short RDMA message %d(%d) from %s\n", - krx->krx_nob, nob, libcfs_nid2str(fromnid)); - goto done; - } - - if (swab) - __swab32s(&msg->kqm_u.rdma.kqrm_rmd.kqrmd_nfrag); - - n = msg->kqm_u.rdma.kqrm_rmd.kqrmd_nfrag; - nob = offsetof(kqswnal_msg_t, - kqm_u.rdma.kqrm_rmd.kqrmd_frag[n]); - - if (krx->krx_nob < nob) { - CERROR("short RDMA message %d(%d) from %s\n", - krx->krx_nob, nob, libcfs_nid2str(fromnid)); - goto done; - } - - if (swab) { - for (i = 0; i < n; i++) { - EP_NMD *nmd = &msg->kqm_u.rdma.kqrm_rmd.kqrmd_frag[i]; - - __swab32s(&nmd->nmd_addr); - __swab32s(&nmd->nmd_len); - __swab32s(&nmd->nmd_attr); - } - } - -#if KQSW_CKSUM - krx->krx_cksum = csum0; /* stash checksum so far */ -#endif - rc = lnet_parse(ni, &msg->kqm_u.rdma.kqrm_hdr, - fromnid, krx, 1); - if (rc < 0) - goto done; - return; - } - /* Not Reached */ - } - - if (msg->kqm_magic == LNET_PROTO_MAGIC || - msg->kqm_magic == __swab32(LNET_PROTO_MAGIC)) { - /* Future protocol version compatibility support! - * When LNET unifies protocols over all LNDs, the first thing a - * peer will send will be a version query RPC. - * 1.4.6 and 1.4.7.early reply with a status block containing - * LNET_PROTO_QSW_MAGIC.. - * Later versions send a failure (-ve) status + - * magic/version */ - - if (!krx->krx_rpc_reply_needed) { - CERROR("Unexpected magic %08x from %s\n", - msg->kqm_magic, libcfs_nid2str(fromnid)); - goto done; - } - - LASSERT (krx->krx_rpc_reply.msg.status == -EPROTO); - goto done; - } - - if (the_lnet.ln_ptlcompat != 0) { - /* Portals compatibility (strong or weak) - * This could be an unencapsulated LNET header. If it's big - * enough, let LNET's parser sort it out */ - - if (krx->krx_nob < sizeof(lnet_hdr_t)) { - CERROR("Short portals-compatible message from %s\n", - libcfs_nid2str(fromnid)); - goto done; - } - - krx->krx_raw_lnet_hdr = 1; - rc = lnet_parse(ni, (lnet_hdr_t *)msg, - fromnid, krx, krx->krx_rpc_reply_needed); - if (rc < 0) - goto done; - return; - } - - CERROR("Unrecognised magic %08x from %s\n", - msg->kqm_magic, libcfs_nid2str(fromnid)); - done: - kqswnal_rx_decref(krx); -} - -/* Receive Interrupt Handler: posts to schedulers */ -void -kqswnal_rxhandler(EP_RXD *rxd) -{ - unsigned long flags; - int nob = ep_rxd_len (rxd); - int status = ep_rxd_status (rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg (rxd); - CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n", - rxd, krx, nob, status); - - LASSERT (krx != NULL); - LASSERT (krx->krx_state == KRX_POSTED); - - krx->krx_state = KRX_PARSE; - krx->krx_rxd = rxd; - krx->krx_nob = nob; - krx->krx_raw_lnet_hdr = 0; - - /* RPC reply iff rpc request received without error */ - krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd) && - (status == EP_SUCCESS || - status == EP_MSG_TOO_BIG); - - /* Default to failure if an RPC reply is requested but not handled */ - krx->krx_rpc_reply.msg.status = -EPROTO; - atomic_set (&krx->krx_refcount, 1); - - if (status != EP_SUCCESS) { - /* receives complete with failure when receiver is removed */ - if (status == EP_SHUTDOWN) - LASSERT (kqswnal_data.kqn_shuttingdown); - else - CERROR("receive status failed with status %d nob %d\n", - ep_rxd_status(rxd), nob); - kqswnal_rx_decref(krx); - return; - } - - if (!in_interrupt()) { - kqswnal_parse(krx); - return; - } - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds); - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -} - -int -kqswnal_recv (lnet_ni_t *ni, - void *private, - lnet_msg_t *lntmsg, - int delayed, - unsigned int niov, - struct iovec *iov, - lnet_kiov_t *kiov, - unsigned int offset, - unsigned int mlen, - unsigned int rlen) -{ - kqswnal_rx_t *krx = (kqswnal_rx_t *)private; - lnet_nid_t fromnid; - kqswnal_msg_t *msg; - lnet_hdr_t *hdr; - kqswnal_remotemd_t *rmd; - int msg_offset; - int rc; - - LASSERT (!in_interrupt ()); /* OK to map */ - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - fromnid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ep_rxd_node(krx->krx_rxd)); - msg = (kqswnal_msg_t *)page_address(krx->krx_kiov[0].kiov_page); - - if (krx->krx_rpc_reply_needed) { - /* optimized (rdma) request sent as RPC */ - - if (krx->krx_raw_lnet_hdr) { - LASSERT (the_lnet.ln_ptlcompat != 0); - hdr = (lnet_hdr_t *)msg; - rmd = kqswnal_get_portalscompat_rmd(krx); - if (rmd == NULL) - return (-EPROTO); - } else { - LASSERT (msg->kqm_type == QSWLND_MSG_RDMA); - hdr = &msg->kqm_u.rdma.kqrm_hdr; - rmd = &msg->kqm_u.rdma.kqrm_rmd; - } - - /* NB header is still in wire byte order */ - - switch (le32_to_cpu(hdr->type)) { - case LNET_MSG_PUT: - case LNET_MSG_REPLY: - /* This is an optimized PUT/REPLY */ - rc = kqswnal_rdma(krx, lntmsg, - KTX_RDMA_FETCH, rmd, - niov, iov, kiov, offset, mlen); - break; - - case LNET_MSG_GET: -#if KQSW_CKSUM - if (krx->krx_cksum != msg->kqm_cksum) { - CERROR("Bad GET checksum %08x(%08x) from %s\n", - krx->krx_cksum, msg->kqm_cksum, - libcfs_nid2str(fromnid)); - rc = -EIO; - break; - } -#endif - if (lntmsg == NULL) { - /* No buffer match: my decref will - * complete the RPC with failure */ - rc = 0; - } else { - /* Matched something! */ - rc = kqswnal_rdma(krx, lntmsg, - KTX_RDMA_STORE, rmd, - lntmsg->msg_niov, - lntmsg->msg_iov, - lntmsg->msg_kiov, - lntmsg->msg_offset, - lntmsg->msg_len); - } - break; - - default: - CERROR("Bad RPC type %d\n", - le32_to_cpu(hdr->type)); - rc = -EPROTO; - break; - } - - kqswnal_rx_decref(krx); - return rc; - } - - if (krx->krx_raw_lnet_hdr) { - LASSERT (the_lnet.ln_ptlcompat != 0); - msg_offset = sizeof(lnet_hdr_t); - } else { - LASSERT (msg->kqm_type == QSWLND_MSG_IMMEDIATE); - msg_offset = offsetof(kqswnal_msg_t, kqm_u.immediate.kqim_payload); - } - - if (krx->krx_nob < msg_offset + rlen) { - CERROR("Bad message size from %s: have %d, need %d + %d\n", - libcfs_nid2str(fromnid), krx->krx_nob, - msg_offset, rlen); - kqswnal_rx_decref(krx); - return -EPROTO; - } - - if (kiov != NULL) - lnet_copy_kiov2kiov(niov, kiov, offset, - krx->krx_npages, krx->krx_kiov, - msg_offset, mlen); - else - lnet_copy_kiov2iov(niov, iov, offset, - krx->krx_npages, krx->krx_kiov, - msg_offset, mlen); - - lnet_finalize(ni, lntmsg, 0); - kqswnal_rx_decref(krx); - return 0; -} - -int -kqswnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kqswnal_data.kqn_nthreads); - return (0); -} - -void -kqswnal_thread_fini (void) -{ - atomic_dec (&kqswnal_data.kqn_nthreads); -} - -int -kqswnal_scheduler (void *arg) -{ - kqswnal_rx_t *krx; - kqswnal_tx_t *ktx; - unsigned long flags; - int rc; - int counter = 0; - int did_something; - - cfs_daemonize ("kqswnal_sched"); - cfs_block_allsigs (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - for (;;) - { - did_something = 0; - - if (!list_empty (&kqswnal_data.kqn_readyrxds)) - { - krx = list_entry(kqswnal_data.kqn_readyrxds.next, - kqswnal_rx_t, krx_list); - list_del (&krx->krx_list); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - LASSERT (krx->krx_state == KRX_PARSE); - kqswnal_parse (krx); - - did_something = 1; - spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); - } - - if (!list_empty (&kqswnal_data.kqn_donetxds)) - { - ktx = list_entry(kqswnal_data.kqn_donetxds.next, - kqswnal_tx_t, ktx_schedlist); - list_del_init (&ktx->ktx_schedlist); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - kqswnal_tx_done_in_thread_context(ktx); - - did_something = 1; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - - if (!list_empty (&kqswnal_data.kqn_delayedtxds)) - { - ktx = list_entry(kqswnal_data.kqn_delayedtxds.next, - kqswnal_tx_t, ktx_schedlist); - list_del_init (&ktx->ktx_schedlist); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - rc = kqswnal_launch (ktx); - if (rc != 0) { - CERROR("Failed delayed transmit to %s: %d\n", - libcfs_nid2str(ktx->ktx_nid), rc); - kqswnal_tx_done (ktx, rc); - } - atomic_dec (&kqswnal_data.kqn_pending_txs); - - did_something = 1; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - - /* nothing to do or hogging CPU */ - if (!did_something || counter++ == KQSW_RESCHED) { - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - counter = 0; - - if (!did_something) { - if (kqswnal_data.kqn_shuttingdown == 2) { - /* We only exit in stage 2 of shutdown when - * there's nothing left to do */ - break; - } - rc = wait_event_interruptible_exclusive ( - kqswnal_data.kqn_sched_waitq, - kqswnal_data.kqn_shuttingdown == 2 || - !list_empty(&kqswnal_data.kqn_readyrxds) || - !list_empty(&kqswnal_data.kqn_donetxds) || - !list_empty(&kqswnal_data.kqn_delayedtxds)); - LASSERT (rc == 0); - } else if (need_resched()) - schedule (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - } - - kqswnal_thread_fini (); - return (0); -} diff --git a/lnet/klnds/qswlnd/qswlnd_modparams.c b/lnet/klnds/qswlnd/qswlnd_modparams.c deleted file mode 100644 index 8c92d7c49da0bdf154186318f3dd7066f01b5db1..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/qswlnd_modparams.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (C) 2002-2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswlnd.h" - -static int tx_maxcontig = (1<<10); -CFS_MODULE_PARM(tx_maxcontig, "i", int, 0444, - "maximum payload to de-fragment"); - -static int ntxmsgs = 512; -CFS_MODULE_PARM(ntxmsgs, "i", int, 0444, - "# tx msg buffers"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# per-peer concurrent sends"); - -static int nrxmsgs_large = 64; -CFS_MODULE_PARM(nrxmsgs_large, "i", int, 0444, - "# 'large' rx msg buffers"); - -static int ep_envelopes_large = 256; -CFS_MODULE_PARM(ep_envelopes_large, "i", int, 0444, - "# 'large' rx msg envelope buffers"); - -static int nrxmsgs_small = 256; -CFS_MODULE_PARM(nrxmsgs_small, "i", int, 0444, - "# 'small' rx msg buffers"); - -static int ep_envelopes_small = 2048; -CFS_MODULE_PARM(ep_envelopes_small, "i", int, 0444, - "# 'small' rx msg envelope buffers"); - -static int optimized_puts = (32<<10); -CFS_MODULE_PARM(optimized_puts, "i", int, 0644, - "zero-copy puts >= this size"); - -static int optimized_gets = 2048; -CFS_MODULE_PARM(optimized_gets, "i", int, 0644, - "zero-copy gets >= this size"); - -#if KQSW_CKSUM -static int inject_csum_error = 0; -CFS_MODULE_PARM(inject_csum_error, "i", int, 0644, - "test checksumming"); -#endif - -kqswnal_tunables_t kqswnal_tunables = { - .kqn_tx_maxcontig = &tx_maxcontig, - .kqn_ntxmsgs = &ntxmsgs, - .kqn_credits = &credits, - .kqn_peercredits = &peer_credits, - .kqn_nrxmsgs_large = &nrxmsgs_large, - .kqn_ep_envelopes_large = &ep_envelopes_large, - .kqn_nrxmsgs_small = &nrxmsgs_small, - .kqn_ep_envelopes_small = &ep_envelopes_small, - .kqn_optimized_puts = &optimized_puts, - .kqn_optimized_gets = &optimized_gets, -#if KQSW_CKSUM - .kqn_inject_csum_error = &inject_csum_error, -#endif -}; - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM -static cfs_sysctl_table_t kqswnal_ctl_table[] = { - { - .ctl_name = 1, - .procname = "tx_maxcontig", - .data = &tx_maxcontig, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 2, - .procname = "ntxmsgs", - .data = &ntxmsgs, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 3, - .procname = "credits", - .data = &credits, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 4, - .procname = "peer_credits", - .data = &peer_credits, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 5, - .procname = "nrxmsgs_large", - .data = &nrxmsgs_large, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 6, - .procname = "ep_envelopes_large", - .data = &ep_envelopes_large, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 7, - .procname = "nrxmsgs_small", - .data = &nrxmsgs_small, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 8, - .procname = "ep_envelopes_small", - .data = &ep_envelopes_small, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 9, - .procname = "optimized_puts", - .data = &optimized_puts, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 10, - .procname = "optimized_gets", - .data = &optimized_gets, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, -#if KQSW_CKSUM - { - .ctl_name = 11, - .procname = "inject_csum_error", - .data = &inject_csum_error, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, -#endif - {0} -}; - -static cfs_sysctl_table_t kqswnal_top_ctl_table[] = { - { - .ctl_name = 201, - .procname = "qswnal", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = kqswnal_ctl_table - }, - {0} -}; - -int -kqswnal_tunables_init () -{ - kqswnal_tunables.kqn_sysctl = - cfs_register_sysctl_table(kqswnal_top_ctl_table, 0); - - if (kqswnal_tunables.kqn_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -kqswnal_tunables_fini () -{ - if (kqswnal_tunables.kqn_sysctl != NULL) - cfs_unregister_sysctl_table(kqswnal_tunables.kqn_sysctl); -} -#else -int -kqswnal_tunables_init () -{ - return 0; -} - -void -kqswnal_tunables_fini () -{ -} -#endif diff --git a/lnet/klnds/ralnd/.cvsignore b/lnet/klnds/ralnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/ralnd/Makefile.in b/lnet/klnds/ralnd/Makefile.in deleted file mode 100644 index e1f5e8242e6fff49e36014d00cfa2d491d511528..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kralnd -kralnd-objs := ralnd.o ralnd_cb.o ralnd_modparams.o - -EXTRA_POST_CFLAGS := @RACPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/ralnd/autoMakefile.am b/lnet/klnds/ralnd/autoMakefile.am deleted file mode 100644 index 7f3df4c432d71b4596a69a4be76eb5eb609b7530..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_RALND -modulenet_DATA = kralnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kralnd-objs:%.o=%.c) ralnd.h diff --git a/lnet/klnds/ralnd/ralnd.c b/lnet/klnds/ralnd/ralnd.c deleted file mode 100644 index a0a4d93f164862731f788505c3457625b3913538..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/ralnd.c +++ /dev/null @@ -1,1741 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#include "ralnd.h" - -static int kranal_devids[RANAL_MAXDEVS] = {RAPK_MAIN_DEVICE_ID, - RAPK_EXPANSION_DEVICE_ID}; - -lnd_t the_kralnd = { - .lnd_type = RALND, - .lnd_startup = kranal_startup, - .lnd_shutdown = kranal_shutdown, - .lnd_ctl = kranal_ctl, - .lnd_send = kranal_send, - .lnd_recv = kranal_recv, - .lnd_eager_recv = kranal_eager_recv, - .lnd_accept = kranal_accept, -}; - -kra_data_t kranal_data; - -void -kranal_pack_connreq(kra_connreq_t *connreq, kra_conn_t *conn, lnet_nid_t dstnid) -{ - RAP_RETURN rrc; - - memset(connreq, 0, sizeof(*connreq)); - - connreq->racr_magic = RANAL_MSG_MAGIC; - connreq->racr_version = RANAL_MSG_VERSION; - - if (conn == NULL) /* prepping a "stub" reply */ - return; - - connreq->racr_devid = conn->rac_device->rad_id; - connreq->racr_srcnid = lnet_ptlcompat_srcnid(kranal_data.kra_ni->ni_nid, - dstnid); - connreq->racr_dstnid = dstnid; - connreq->racr_peerstamp = kranal_data.kra_peerstamp; - connreq->racr_connstamp = conn->rac_my_connstamp; - connreq->racr_timeout = conn->rac_timeout; - - rrc = RapkGetRiParams(conn->rac_rihandle, &connreq->racr_riparams); - LASSERT(rrc == RAP_SUCCESS); -} - -int -kranal_recv_connreq(struct socket *sock, kra_connreq_t *connreq, int active) -{ - int timeout = active ? *kranal_tunables.kra_timeout : - lnet_acceptor_timeout(); - int swab; - int rc; - - /* return 0 on success, -ve on error, +ve to tell the peer I'm "old" */ - - rc = libcfs_sock_read(sock, &connreq->racr_magic, - sizeof(connreq->racr_magic), timeout); - if (rc != 0) { - CERROR("Read(magic) failed(1): %d\n", rc); - return -EIO; - } - - if (connreq->racr_magic != RANAL_MSG_MAGIC && - connreq->racr_magic != __swab32(RANAL_MSG_MAGIC)) { - /* Unexpected magic! */ - if (!active && - the_lnet.ln_ptlcompat == 0 && - (connreq->racr_magic == LNET_PROTO_MAGIC || - connreq->racr_magic == __swab32(LNET_PROTO_MAGIC))) { - /* future protocol version compatibility! - * When LNET unifies protocols over all LNDs, the first - * thing sent will be a version query. +ve rc means I - * reply with my current magic/version */ - return EPROTO; - } - - if (active || - the_lnet.ln_ptlcompat == 0) { - CERROR("Unexpected magic %08x (1)\n", - connreq->racr_magic); - return -EPROTO; - } - - /* When portals compatibility is set, I may be passed a new - * connection "blindly" by the acceptor, and I have to - * determine if my peer has sent an acceptor connection request - * or not. This isn't a connreq, so I'll get the acceptor to - * look at it... */ - rc = lnet_accept(kranal_data.kra_ni, sock, connreq->racr_magic); - if (rc != 0) - return -EPROTO; - - /* ...and if it's OK I'm back to looking for a connreq... */ - rc = libcfs_sock_read(sock, &connreq->racr_magic, - sizeof(connreq->racr_magic), timeout); - if (rc != 0) { - CERROR("Read(magic) failed(2): %d\n", rc); - return -EIO; - } - - if (connreq->racr_magic != RANAL_MSG_MAGIC && - connreq->racr_magic != __swab32(RANAL_MSG_MAGIC)) { - CERROR("Unexpected magic %08x(2)\n", - connreq->racr_magic); - return -EPROTO; - } - } - - swab = (connreq->racr_magic == __swab32(RANAL_MSG_MAGIC)); - - rc = libcfs_sock_read(sock, &connreq->racr_version, - sizeof(connreq->racr_version), timeout); - if (rc != 0) { - CERROR("Read(version) failed: %d\n", rc); - return -EIO; - } - - if (swab) - __swab16s(&connreq->racr_version); - - if (connreq->racr_version != RANAL_MSG_VERSION) { - if (active) { - CERROR("Unexpected version %d\n", connreq->racr_version); - return -EPROTO; - } - /* If this is a future version of the ralnd protocol, and I'm - * passive (accepted the connection), tell my peer I'm "old" - * (+ve rc) */ - return EPROTO; - } - - rc = libcfs_sock_read(sock, &connreq->racr_devid, - sizeof(connreq->racr_version) - - offsetof(kra_connreq_t, racr_devid), - timeout); - if (rc != 0) { - CERROR("Read(body) failed: %d\n", rc); - return -EIO; - } - - if (swab) { - __swab32s(&connreq->racr_magic); - __swab16s(&connreq->racr_version); - __swab16s(&connreq->racr_devid); - __swab64s(&connreq->racr_srcnid); - __swab64s(&connreq->racr_dstnid); - __swab64s(&connreq->racr_peerstamp); - __swab64s(&connreq->racr_connstamp); - __swab32s(&connreq->racr_timeout); - - __swab32s(&connreq->racr_riparams.HostId); - __swab32s(&connreq->racr_riparams.FmaDomainHndl); - __swab32s(&connreq->racr_riparams.PTag); - __swab32s(&connreq->racr_riparams.CompletionCookie); - } - - if (connreq->racr_srcnid == LNET_NID_ANY || - connreq->racr_dstnid == LNET_NID_ANY) { - CERROR("Received LNET_NID_ANY\n"); - return -EPROTO; - } - - if (connreq->racr_timeout < RANAL_MIN_TIMEOUT) { - CERROR("Received timeout %d < MIN %d\n", - connreq->racr_timeout, RANAL_MIN_TIMEOUT); - return -EPROTO; - } - - return 0; -} - -int -kranal_close_stale_conns_locked (kra_peer_t *peer, kra_conn_t *newconn) -{ - kra_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int loopback; - int count = 0; - - loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid; - - list_for_each_safe (ctmp, cnxt, &peer->rap_conns) { - conn = list_entry(ctmp, kra_conn_t, rac_list); - - if (conn == newconn) - continue; - - if (conn->rac_peerstamp != newconn->rac_peerstamp) { - CDEBUG(D_NET, "Closing stale conn nid: %s " - " peerstamp:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->rap_nid), - conn->rac_peerstamp, newconn->rac_peerstamp); - LASSERT (conn->rac_peerstamp < newconn->rac_peerstamp); - count++; - kranal_close_conn_locked(conn, -ESTALE); - continue; - } - - if (conn->rac_device != newconn->rac_device) - continue; - - if (loopback && - newconn->rac_my_connstamp == conn->rac_peer_connstamp && - newconn->rac_peer_connstamp == conn->rac_my_connstamp) - continue; - - LASSERT (conn->rac_peer_connstamp < newconn->rac_peer_connstamp); - - CDEBUG(D_NET, "Closing stale conn nid: %s" - " connstamp:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->rap_nid), - conn->rac_peer_connstamp, newconn->rac_peer_connstamp); - - count++; - kranal_close_conn_locked(conn, -ESTALE); - } - - return count; -} - -int -kranal_conn_isdup_locked(kra_peer_t *peer, kra_conn_t *newconn) -{ - kra_conn_t *conn; - struct list_head *tmp; - int loopback; - - loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid; - - list_for_each(tmp, &peer->rap_conns) { - conn = list_entry(tmp, kra_conn_t, rac_list); - - /* 'newconn' is from an earlier version of 'peer'!!! */ - if (newconn->rac_peerstamp < conn->rac_peerstamp) - return 1; - - /* 'conn' is from an earlier version of 'peer': it will be - * removed when we cull stale conns later on... */ - if (newconn->rac_peerstamp > conn->rac_peerstamp) - continue; - - /* Different devices are OK */ - if (conn->rac_device != newconn->rac_device) - continue; - - /* It's me connecting to myself */ - if (loopback && - newconn->rac_my_connstamp == conn->rac_peer_connstamp && - newconn->rac_peer_connstamp == conn->rac_my_connstamp) - continue; - - /* 'newconn' is an earlier connection from 'peer'!!! */ - if (newconn->rac_peer_connstamp < conn->rac_peer_connstamp) - return 2; - - /* 'conn' is an earlier connection from 'peer': it will be - * removed when we cull stale conns later on... */ - if (newconn->rac_peer_connstamp > conn->rac_peer_connstamp) - continue; - - /* 'newconn' has the SAME connection stamp; 'peer' isn't - * playing the game... */ - return 3; - } - - return 0; -} - -void -kranal_set_conn_uniqueness (kra_conn_t *conn) -{ - unsigned long flags; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - conn->rac_my_connstamp = kranal_data.kra_connstamp++; - - do { /* allocate a unique cqid */ - conn->rac_cqid = kranal_data.kra_next_cqid++; - } while (kranal_cqid2conn_locked(conn->rac_cqid) != NULL); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); -} - -int -kranal_create_conn(kra_conn_t **connp, kra_device_t *dev) -{ - kra_conn_t *conn; - RAP_RETURN rrc; - - LASSERT (!in_interrupt()); - LIBCFS_ALLOC(conn, sizeof(*conn)); - - if (conn == NULL) - return -ENOMEM; - - memset(conn, 0, sizeof(*conn)); - atomic_set(&conn->rac_refcount, 1); - INIT_LIST_HEAD(&conn->rac_list); - INIT_LIST_HEAD(&conn->rac_hashlist); - INIT_LIST_HEAD(&conn->rac_schedlist); - INIT_LIST_HEAD(&conn->rac_fmaq); - INIT_LIST_HEAD(&conn->rac_rdmaq); - INIT_LIST_HEAD(&conn->rac_replyq); - spin_lock_init(&conn->rac_lock); - - kranal_set_conn_uniqueness(conn); - - conn->rac_device = dev; - conn->rac_timeout = MAX(*kranal_tunables.kra_timeout, RANAL_MIN_TIMEOUT); - kranal_update_reaper_timeout(conn->rac_timeout); - - rrc = RapkCreateRi(dev->rad_handle, conn->rac_cqid, - &conn->rac_rihandle); - if (rrc != RAP_SUCCESS) { - CERROR("RapkCreateRi failed: %d\n", rrc); - LIBCFS_FREE(conn, sizeof(*conn)); - return -ENETDOWN; - } - - atomic_inc(&kranal_data.kra_nconns); - *connp = conn; - return 0; -} - -void -kranal_destroy_conn(kra_conn_t *conn) -{ - RAP_RETURN rrc; - - LASSERT (!in_interrupt()); - LASSERT (!conn->rac_scheduled); - LASSERT (list_empty(&conn->rac_list)); - LASSERT (list_empty(&conn->rac_hashlist)); - LASSERT (list_empty(&conn->rac_schedlist)); - LASSERT (atomic_read(&conn->rac_refcount) == 0); - LASSERT (list_empty(&conn->rac_fmaq)); - LASSERT (list_empty(&conn->rac_rdmaq)); - LASSERT (list_empty(&conn->rac_replyq)); - - rrc = RapkDestroyRi(conn->rac_device->rad_handle, - conn->rac_rihandle); - LASSERT (rrc == RAP_SUCCESS); - - if (conn->rac_peer != NULL) - kranal_peer_decref(conn->rac_peer); - - LIBCFS_FREE(conn, sizeof(*conn)); - atomic_dec(&kranal_data.kra_nconns); -} - -void -kranal_terminate_conn_locked (kra_conn_t *conn) -{ - LASSERT (!in_interrupt()); - LASSERT (conn->rac_state == RANAL_CONN_CLOSING); - LASSERT (!list_empty(&conn->rac_hashlist)); - LASSERT (list_empty(&conn->rac_list)); - - /* Remove from conn hash table: no new callbacks */ - list_del_init(&conn->rac_hashlist); - kranal_conn_decref(conn); - - conn->rac_state = RANAL_CONN_CLOSED; - - /* schedule to clear out all uncompleted comms in context of dev's - * scheduler */ - kranal_schedule_conn(conn); -} - -void -kranal_close_conn_locked (kra_conn_t *conn, int error) -{ - kra_peer_t *peer = conn->rac_peer; - - CDEBUG(error == 0 ? D_NET : D_NETERROR, - "closing conn to %s: error %d\n", - libcfs_nid2str(peer->rap_nid), error); - - LASSERT (!in_interrupt()); - LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED); - LASSERT (!list_empty(&conn->rac_hashlist)); - LASSERT (!list_empty(&conn->rac_list)); - - list_del_init(&conn->rac_list); - - if (list_empty(&peer->rap_conns) && - peer->rap_persistence == 0) { - /* Non-persistent peer with no more conns... */ - kranal_unlink_peer_locked(peer); - } - - /* Reset RX timeout to ensure we wait for an incoming CLOSE for the - * full timeout. If we get a CLOSE we know the peer has stopped all - * RDMA. Otherwise if we wait for the full timeout we can also be sure - * all RDMA has stopped. */ - conn->rac_last_rx = jiffies; - mb(); - - conn->rac_state = RANAL_CONN_CLOSING; - kranal_schedule_conn(conn); /* schedule sending CLOSE */ - - kranal_conn_decref(conn); /* lose peer's ref */ -} - -void -kranal_close_conn (kra_conn_t *conn, int error) -{ - unsigned long flags; - - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (conn->rac_state == RANAL_CONN_ESTABLISHED) - kranal_close_conn_locked(conn, error); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); -} - -int -kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq, - __u32 peer_ip, int peer_port) -{ - kra_device_t *dev = conn->rac_device; - unsigned long flags; - RAP_RETURN rrc; - - /* CAVEAT EMPTOR: we're really overloading rac_last_tx + rac_keepalive - * to do RapkCompleteSync() timekeeping (see kibnal_scheduler). */ - conn->rac_last_tx = jiffies; - conn->rac_keepalive = 0; - - rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams); - if (rrc != RAP_SUCCESS) { - CERROR("Error setting riparams from %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rrc); - return -ECONNABORTED; - } - - /* Schedule conn on rad_new_conns */ - kranal_conn_addref(conn); - spin_lock_irqsave(&dev->rad_lock, flags); - list_add_tail(&conn->rac_schedlist, &dev->rad_new_conns); - wake_up(&dev->rad_waitq); - spin_unlock_irqrestore(&dev->rad_lock, flags); - - rrc = RapkWaitToConnect(conn->rac_rihandle); - if (rrc != RAP_SUCCESS) { - CERROR("Error waiting to connect to %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rrc); - return -ECONNABORTED; - } - - /* Scheduler doesn't touch conn apart from to deschedule and decref it - * after RapkCompleteSync() return success, so conn is all mine */ - - conn->rac_peerstamp = connreq->racr_peerstamp; - conn->rac_peer_connstamp = connreq->racr_connstamp; - conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout); - kranal_update_reaper_timeout(conn->rac_keepalive); - return 0; -} - -int -kranal_passive_conn_handshake (struct socket *sock, lnet_nid_t *src_nidp, - lnet_nid_t *dst_nidp, kra_conn_t **connp) -{ - __u32 peer_ip; - unsigned int peer_port; - kra_connreq_t rx_connreq; - kra_connreq_t tx_connreq; - kra_conn_t *conn; - kra_device_t *dev; - int rc; - int i; - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - if (rc != 0) { - CERROR("Can't get peer's IP: %d\n", rc); - return rc; - } - - rc = kranal_recv_connreq(sock, &rx_connreq, 0); - - if (rc < 0) { - CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rc); - return rc; - } - - if (rc > 0) { - /* Request from "new" peer: send reply with my MAGIC/VERSION to - * tell her I'm old... */ - kranal_pack_connreq(&tx_connreq, NULL, LNET_NID_ANY); - - rc = libcfs_sock_write(sock, &tx_connreq, sizeof(tx_connreq), - lnet_acceptor_timeout()); - if (rc != 0) - CERROR("Can't tx stub connreq to %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rc); - - return -EPROTO; - } - - for (i = 0;;i++) { - if (i == kranal_data.kra_ndevs) { - CERROR("Can't match dev %d from %u.%u.%u.%u/%d\n", - rx_connreq.racr_devid, HIPQUAD(peer_ip), peer_port); - return -ENODEV; - } - dev = &kranal_data.kra_devices[i]; - if (dev->rad_id == rx_connreq.racr_devid) - break; - } - - rc = kranal_create_conn(&conn, dev); - if (rc != 0) - return rc; - - kranal_pack_connreq(&tx_connreq, conn, rx_connreq.racr_srcnid); - - rc = libcfs_sock_write(sock, &tx_connreq, sizeof(tx_connreq), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rc); - kranal_conn_decref(conn); - return rc; - } - - rc = kranal_set_conn_params(conn, &rx_connreq, peer_ip, peer_port); - if (rc != 0) { - kranal_conn_decref(conn); - return rc; - } - - *connp = conn; - *src_nidp = rx_connreq.racr_srcnid; - *dst_nidp = rx_connreq.racr_dstnid; - return 0; -} - -int -kranal_active_conn_handshake(kra_peer_t *peer, - lnet_nid_t *dst_nidp, kra_conn_t **connp) -{ - kra_connreq_t connreq; - kra_conn_t *conn; - kra_device_t *dev; - struct socket *sock; - int rc; - unsigned int idx; - - /* spread connections over all devices using both peer NIDs to ensure - * all nids use all devices */ - idx = peer->rap_nid + kranal_data.kra_ni->ni_nid; - dev = &kranal_data.kra_devices[idx % kranal_data.kra_ndevs]; - - rc = kranal_create_conn(&conn, dev); - if (rc != 0) - return rc; - - kranal_pack_connreq(&connreq, conn, peer->rap_nid); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto test */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - connreq.racr_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - connreq.racr_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - rc = lnet_connect(&sock, peer->rap_nid, - 0, peer->rap_ip, peer->rap_port); - if (rc != 0) - goto failed_0; - - /* CAVEAT EMPTOR: the passive side receives with a SHORT rx timeout - * immediately after accepting a connection, so we connect and then - * send immediately. */ - - rc = libcfs_sock_write(sock, &connreq, sizeof(connreq), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer->rap_ip), peer->rap_port, rc); - goto failed_2; - } - - rc = kranal_recv_connreq(sock, &connreq, 1); - if (rc != 0) { - CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer->rap_ip), peer->rap_port, rc); - goto failed_2; - } - - libcfs_sock_release(sock); - rc = -EPROTO; - - if (connreq.racr_srcnid != peer->rap_nid) { - CERROR("Unexpected srcnid from %u.%u.%u.%u/%d: " - "received %s expected %s\n", - HIPQUAD(peer->rap_ip), peer->rap_port, - libcfs_nid2str(connreq.racr_srcnid), - libcfs_nid2str(peer->rap_nid)); - goto failed_1; - } - - if (connreq.racr_devid != dev->rad_id) { - CERROR("Unexpected device id from %u.%u.%u.%u/%d: " - "received %d expected %d\n", - HIPQUAD(peer->rap_ip), peer->rap_port, - connreq.racr_devid, dev->rad_id); - goto failed_1; - } - - rc = kranal_set_conn_params(conn, &connreq, - peer->rap_ip, peer->rap_port); - if (rc != 0) - goto failed_1; - - *connp = conn; - *dst_nidp = connreq.racr_dstnid; - return 0; - - failed_2: - libcfs_sock_release(sock); - failed_1: - lnet_connect_console_error(rc, peer->rap_nid, - peer->rap_ip, peer->rap_port); - failed_0: - kranal_conn_decref(conn); - return rc; -} - -int -kranal_conn_handshake (struct socket *sock, kra_peer_t *peer) -{ - kra_peer_t *peer2; - kra_tx_t *tx; - lnet_nid_t peer_nid; - lnet_nid_t dst_nid; - unsigned long flags; - kra_conn_t *conn; - int rc; - int nstale; - int new_peer = 0; - - if (sock == NULL) { - /* active: connd wants to connect to 'peer' */ - LASSERT (peer != NULL); - LASSERT (peer->rap_connecting); - - rc = kranal_active_conn_handshake(peer, &dst_nid, &conn); - if (rc != 0) - return rc; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (!kranal_peer_active(peer)) { - /* raced with peer getting unlinked */ - write_unlock_irqrestore(&kranal_data.kra_global_lock, - flags); - kranal_conn_decref(conn); - return -ESTALE; - } - - peer_nid = peer->rap_nid; - } else { - /* passive: listener accepted 'sock' */ - LASSERT (peer == NULL); - - rc = kranal_passive_conn_handshake(sock, &peer_nid, - &dst_nid, &conn); - if (rc != 0) - return rc; - - /* assume this is a new peer */ - rc = kranal_create_peer(&peer, peer_nid); - if (rc != 0) { - CERROR("Can't create conn for %s\n", - libcfs_nid2str(peer_nid)); - kranal_conn_decref(conn); - return -ENOMEM; - } - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - peer2 = kranal_find_peer_locked(peer_nid); - if (peer2 == NULL) { - new_peer = 1; - } else { - /* peer_nid already in the peer table */ - kranal_peer_decref(peer); - peer = peer2; - } - } - - LASSERT ((!new_peer) != (!kranal_peer_active(peer))); - - /* Refuse connection if peer thinks we are a different NID. We check - * this while holding the global lock, to synch with connection - * destruction on NID change. */ - if (!lnet_ptlcompat_matchnid(kranal_data.kra_ni->ni_nid, dst_nid)) { - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - CERROR("Stale/bad connection with %s: dst_nid %s, expected %s\n", - libcfs_nid2str(peer_nid), libcfs_nid2str(dst_nid), - libcfs_nid2str(kranal_data.kra_ni->ni_nid)); - rc = -ESTALE; - goto failed; - } - - /* Refuse to duplicate an existing connection (both sides might try to - * connect at once). NB we return success! We _are_ connected so we - * _don't_ have any blocked txs to complete with failure. */ - rc = kranal_conn_isdup_locked(peer, conn); - if (rc != 0) { - LASSERT (!list_empty(&peer->rap_conns)); - LASSERT (list_empty(&peer->rap_tx_queue)); - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - CWARN("Not creating duplicate connection to %s: %d\n", - libcfs_nid2str(peer_nid), rc); - rc = 0; - goto failed; - } - - if (new_peer) { - /* peer table takes my ref on the new peer */ - list_add_tail(&peer->rap_list, - kranal_nid2peerlist(peer_nid)); - } - - /* initialise timestamps before reaper looks at them */ - conn->rac_last_tx = conn->rac_last_rx = jiffies; - - kranal_peer_addref(peer); /* +1 ref for conn */ - conn->rac_peer = peer; - list_add_tail(&conn->rac_list, &peer->rap_conns); - - kranal_conn_addref(conn); /* +1 ref for conn table */ - list_add_tail(&conn->rac_hashlist, - kranal_cqid2connlist(conn->rac_cqid)); - - /* Schedule all packets blocking for a connection */ - while (!list_empty(&peer->rap_tx_queue)) { - tx = list_entry(peer->rap_tx_queue.next, - kra_tx_t, tx_list); - - list_del(&tx->tx_list); - kranal_post_fma(conn, tx); - } - - nstale = kranal_close_stale_conns_locked(peer, conn); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - /* CAVEAT EMPTOR: passive peer can disappear NOW */ - - if (nstale != 0) - CWARN("Closed %d stale conns to %s\n", nstale, - libcfs_nid2str(peer_nid)); - - CWARN("New connection to %s on devid[%d] = %d\n", - libcfs_nid2str(peer_nid), - conn->rac_device->rad_idx, conn->rac_device->rad_id); - - /* Ensure conn gets checked. Transmits may have been queued and an - * FMA event may have happened before it got in the cq hash table */ - kranal_schedule_conn(conn); - return 0; - - failed: - if (new_peer) - kranal_peer_decref(peer); - kranal_conn_decref(conn); - return rc; -} - -void -kranal_connect (kra_peer_t *peer) -{ - kra_tx_t *tx; - unsigned long flags; - struct list_head zombies; - int rc; - - LASSERT (peer->rap_connecting); - - CDEBUG(D_NET, "About to handshake %s\n", - libcfs_nid2str(peer->rap_nid)); - - rc = kranal_conn_handshake(NULL, peer); - - CDEBUG(D_NET, "Done handshake %s:%d \n", - libcfs_nid2str(peer->rap_nid), rc); - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - LASSERT (peer->rap_connecting); - peer->rap_connecting = 0; - - if (rc == 0) { - /* kranal_conn_handshake() queues blocked txs immediately on - * success to avoid messages jumping the queue */ - LASSERT (list_empty(&peer->rap_tx_queue)); - - peer->rap_reconnect_interval = 0; /* OK to reconnect at any time */ - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - return; - } - - peer->rap_reconnect_interval *= 2; - peer->rap_reconnect_interval = - MAX(peer->rap_reconnect_interval, - *kranal_tunables.kra_min_reconnect_interval); - peer->rap_reconnect_interval = - MIN(peer->rap_reconnect_interval, - *kranal_tunables.kra_max_reconnect_interval); - - peer->rap_reconnect_time = jiffies + peer->rap_reconnect_interval * HZ; - - /* Grab all blocked packets while we have the global lock */ - list_add(&zombies, &peer->rap_tx_queue); - list_del_init(&peer->rap_tx_queue); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - if (list_empty(&zombies)) - return; - - CDEBUG(D_NETERROR, "Dropping packets for %s: connection failed\n", - libcfs_nid2str(peer->rap_nid)); - - do { - tx = list_entry(zombies.next, kra_tx_t, tx_list); - - list_del(&tx->tx_list); - kranal_tx_done(tx, -EHOSTUNREACH); - - } while (!list_empty(&zombies)); -} - -void -kranal_free_acceptsock (kra_acceptsock_t *ras) -{ - libcfs_sock_release(ras->ras_sock); - LIBCFS_FREE(ras, sizeof(*ras)); -} - -int -kranal_accept (lnet_ni_t *ni, struct socket *sock) -{ - kra_acceptsock_t *ras; - int rc; - __u32 peer_ip; - int peer_port; - unsigned long flags; - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT (rc == 0); /* we succeeded before */ - - LIBCFS_ALLOC(ras, sizeof(*ras)); - if (ras == NULL) { - CERROR("ENOMEM allocating connection request from " - "%u.%u.%u.%u\n", HIPQUAD(peer_ip)); - return -ENOMEM; - } - - ras->ras_sock = sock; - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - - list_add_tail(&ras->ras_list, &kranal_data.kra_connd_acceptq); - wake_up(&kranal_data.kra_connd_waitq); - - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - return 0; -} - -int -kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid) -{ - kra_peer_t *peer; - unsigned long flags; - - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC(peer, sizeof(*peer)); - if (peer == NULL) - return -ENOMEM; - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->rap_nid = nid; - atomic_set(&peer->rap_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD(&peer->rap_list); - INIT_LIST_HEAD(&peer->rap_connd_list); - INIT_LIST_HEAD(&peer->rap_conns); - INIT_LIST_HEAD(&peer->rap_tx_queue); - - peer->rap_reconnect_interval = 0; /* OK to connect at any time */ - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (kranal_data.kra_nonewpeers) { - /* shutdown has started already */ - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - LIBCFS_FREE(peer, sizeof(*peer)); - CERROR("Can't create peer: network shutdown\n"); - return -ESHUTDOWN; - } - - atomic_inc(&kranal_data.kra_npeers); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - *peerp = peer; - return 0; -} - -void -kranal_destroy_peer (kra_peer_t *peer) -{ - CDEBUG(D_NET, "peer %s %p deleted\n", - libcfs_nid2str(peer->rap_nid), peer); - - LASSERT (atomic_read(&peer->rap_refcount) == 0); - LASSERT (peer->rap_persistence == 0); - LASSERT (!kranal_peer_active(peer)); - LASSERT (!peer->rap_connecting); - LASSERT (list_empty(&peer->rap_conns)); - LASSERT (list_empty(&peer->rap_tx_queue)); - LASSERT (list_empty(&peer->rap_connd_list)); - - LIBCFS_FREE(peer, sizeof(*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec(&kranal_data.kra_npeers); -} - -kra_peer_t * -kranal_find_peer_locked (lnet_nid_t nid) -{ - struct list_head *peer_list = kranal_nid2peerlist(nid); - struct list_head *tmp; - kra_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry(tmp, kra_peer_t, rap_list); - - LASSERT (peer->rap_persistence > 0 || /* persistent peer */ - !list_empty(&peer->rap_conns)); /* active conn */ - - if (peer->rap_nid != nid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_nid2str(nid), - atomic_read(&peer->rap_refcount)); - return peer; - } - return NULL; -} - -kra_peer_t * -kranal_find_peer (lnet_nid_t nid) -{ - kra_peer_t *peer; - - read_lock(&kranal_data.kra_global_lock); - peer = kranal_find_peer_locked(nid); - if (peer != NULL) /* +1 ref for caller? */ - kranal_peer_addref(peer); - read_unlock(&kranal_data.kra_global_lock); - - return peer; -} - -void -kranal_unlink_peer_locked (kra_peer_t *peer) -{ - LASSERT (peer->rap_persistence == 0); - LASSERT (list_empty(&peer->rap_conns)); - - LASSERT (kranal_peer_active(peer)); - list_del_init(&peer->rap_list); - - /* lose peerlist's ref */ - kranal_peer_decref(peer); -} - -int -kranal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp, - int *persistencep) -{ - kra_peer_t *peer; - struct list_head *ptmp; - int i; - - read_lock(&kranal_data.kra_global_lock); - - for (i = 0; i < kranal_data.kra_peer_hash_size; i++) { - - list_for_each(ptmp, &kranal_data.kra_peers[i]) { - - peer = list_entry(ptmp, kra_peer_t, rap_list); - LASSERT (peer->rap_persistence > 0 || - !list_empty(&peer->rap_conns)); - - if (index-- > 0) - continue; - - *nidp = peer->rap_nid; - *ipp = peer->rap_ip; - *portp = peer->rap_port; - *persistencep = peer->rap_persistence; - - read_unlock(&kranal_data.kra_global_lock); - return 0; - } - } - - read_unlock(&kranal_data.kra_global_lock); - return -ENOENT; -} - -int -kranal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port) -{ - unsigned long flags; - kra_peer_t *peer; - kra_peer_t *peer2; - int rc; - - if (nid == LNET_NID_ANY) - return -EINVAL; - - rc = kranal_create_peer(&peer, nid); - if (rc != 0) - return rc; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - peer2 = kranal_find_peer_locked(nid); - if (peer2 != NULL) { - kranal_peer_decref(peer); - peer = peer2; - } else { - /* peer table takes existing ref on peer */ - list_add_tail(&peer->rap_list, - kranal_nid2peerlist(nid)); - } - - peer->rap_ip = ip; - peer->rap_port = port; - peer->rap_persistence++; - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - return 0; -} - -void -kranal_del_peer_locked (kra_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kra_conn_t *conn; - - peer->rap_persistence = 0; - - if (list_empty(&peer->rap_conns)) { - kranal_unlink_peer_locked(peer); - } else { - list_for_each_safe(ctmp, cnxt, &peer->rap_conns) { - conn = list_entry(ctmp, kra_conn_t, rac_list); - - kranal_close_conn_locked(conn, 0); - } - /* peer unlinks itself when last conn is closed */ - } -} - -int -kranal_del_peer (lnet_nid_t nid) -{ - unsigned long flags; - struct list_head *ptmp; - struct list_head *pnxt; - kra_peer_t *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers; - else { - lo = 0; - hi = kranal_data.kra_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) { - peer = list_entry(ptmp, kra_peer_t, rap_list); - LASSERT (peer->rap_persistence > 0 || - !list_empty(&peer->rap_conns)); - - if (!(nid == LNET_NID_ANY || peer->rap_nid == nid)) - continue; - - kranal_del_peer_locked(peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - return rc; -} - -kra_conn_t * -kranal_get_conn_by_idx (int index) -{ - kra_peer_t *peer; - struct list_head *ptmp; - kra_conn_t *conn; - struct list_head *ctmp; - int i; - - read_lock (&kranal_data.kra_global_lock); - - for (i = 0; i < kranal_data.kra_peer_hash_size; i++) { - list_for_each (ptmp, &kranal_data.kra_peers[i]) { - - peer = list_entry(ptmp, kra_peer_t, rap_list); - LASSERT (peer->rap_persistence > 0 || - !list_empty(&peer->rap_conns)); - - list_for_each (ctmp, &peer->rap_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, kra_conn_t, rac_list); - CDEBUG(D_NET, "++conn[%p] -> %s (%d)\n", conn, - libcfs_nid2str(conn->rac_peer->rap_nid), - atomic_read(&conn->rac_refcount)); - atomic_inc(&conn->rac_refcount); - read_unlock(&kranal_data.kra_global_lock); - return conn; - } - } - } - - read_unlock(&kranal_data.kra_global_lock); - return NULL; -} - -int -kranal_close_peer_conns_locked (kra_peer_t *peer, int why) -{ - kra_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->rap_conns) { - conn = list_entry(ctmp, kra_conn_t, rac_list); - - count++; - kranal_close_conn_locked(conn, why); - } - - return count; -} - -int -kranal_close_matching_conns (lnet_nid_t nid) -{ - unsigned long flags; - kra_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers; - else { - lo = 0; - hi = kranal_data.kra_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) { - - peer = list_entry(ptmp, kra_peer_t, rap_list); - LASSERT (peer->rap_persistence > 0 || - !list_empty(&peer->rap_conns)); - - if (!(nid == LNET_NID_ANY || nid == peer->rap_nid)) - continue; - - count += kranal_close_peer_conns_locked(peer, 0); - } - } - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return 0; - - return (count == 0) ? -ENOENT : 0; -} - -int -kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - LASSERT (ni == kranal_data.kra_ni); - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - __u32 ip = 0; - int port = 0; - int share_count = 0; - - rc = kranal_get_peer_info(data->ioc_count, - &nid, &ip, &port, &share_count); - data->ioc_nid = nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - break; - } - case IOC_LIBCFS_ADD_PEER: { - rc = kranal_add_persistent_peer(data->ioc_nid, - data->ioc_u32[0], /* IP */ - data->ioc_u32[1]); /* port */ - break; - } - case IOC_LIBCFS_DEL_PEER: { - rc = kranal_del_peer(data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kra_conn_t *conn = kranal_get_conn_by_idx(data->ioc_count); - - if (conn == NULL) - rc = -ENOENT; - else { - rc = 0; - data->ioc_nid = conn->rac_peer->rap_nid; - data->ioc_u32[0] = conn->rac_device->rad_id; - kranal_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kranal_close_matching_conns(data->ioc_nid); - break; - } - case IOC_LIBCFS_REGISTER_MYNID: { - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) { - rc = 0; - } else { - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - rc = -EINVAL; - } - break; - } - } - - return rc; -} - -void -kranal_free_txdescs(struct list_head *freelist) -{ - kra_tx_t *tx; - - while (!list_empty(freelist)) { - tx = list_entry(freelist->next, kra_tx_t, tx_list); - - list_del(&tx->tx_list); - LIBCFS_FREE(tx->tx_phys, LNET_MAX_IOV * sizeof(*tx->tx_phys)); - LIBCFS_FREE(tx, sizeof(*tx)); - } -} - -int -kranal_alloc_txdescs(struct list_head *freelist, int n) -{ - int i; - kra_tx_t *tx; - - LASSERT (freelist == &kranal_data.kra_idle_txs); - LASSERT (list_empty(freelist)); - - for (i = 0; i < n; i++) { - - LIBCFS_ALLOC(tx, sizeof(*tx)); - if (tx == NULL) { - CERROR("Can't allocate tx[%d]\n", i); - kranal_free_txdescs(freelist); - return -ENOMEM; - } - - LIBCFS_ALLOC(tx->tx_phys, - LNET_MAX_IOV * sizeof(*tx->tx_phys)); - if (tx->tx_phys == NULL) { - CERROR("Can't allocate tx[%d]->tx_phys\n", i); - - LIBCFS_FREE(tx, sizeof(*tx)); - kranal_free_txdescs(freelist); - return -ENOMEM; - } - - tx->tx_buftype = RANAL_BUF_NONE; - tx->tx_msg.ram_type = RANAL_MSG_NONE; - - list_add(&tx->tx_list, freelist); - } - - return 0; -} - -int -kranal_device_init(int id, kra_device_t *dev) -{ - int total_ntx = *kranal_tunables.kra_ntx; - RAP_RETURN rrc; - - dev->rad_id = id; - rrc = RapkGetDeviceByIndex(id, kranal_device_callback, - &dev->rad_handle); - if (rrc != RAP_SUCCESS) { - CERROR("Can't get Rapidarray Device %d: %d\n", id, rrc); - goto failed_0; - } - - rrc = RapkReserveRdma(dev->rad_handle, total_ntx); - if (rrc != RAP_SUCCESS) { - CERROR("Can't reserve %d RDMA descriptors" - " for device %d: %d\n", total_ntx, id, rrc); - goto failed_1; - } - - rrc = RapkCreateCQ(dev->rad_handle, total_ntx, RAP_CQTYPE_SEND, - &dev->rad_rdma_cqh); - if (rrc != RAP_SUCCESS) { - CERROR("Can't create rdma cq size %d for device %d: %d\n", - total_ntx, id, rrc); - goto failed_1; - } - - rrc = RapkCreateCQ(dev->rad_handle, - *kranal_tunables.kra_fma_cq_size, - RAP_CQTYPE_RECV, &dev->rad_fma_cqh); - if (rrc != RAP_SUCCESS) { - CERROR("Can't create fma cq size %d for device %d: %d\n", - *kranal_tunables.kra_fma_cq_size, id, rrc); - goto failed_2; - } - - return 0; - - failed_2: - RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh); - failed_1: - RapkReleaseDevice(dev->rad_handle); - failed_0: - return -ENODEV; -} - -void -kranal_device_fini(kra_device_t *dev) -{ - LASSERT (list_empty(&dev->rad_ready_conns)); - LASSERT (list_empty(&dev->rad_new_conns)); - LASSERT (dev->rad_nphysmap == 0); - LASSERT (dev->rad_nppphysmap == 0); - LASSERT (dev->rad_nvirtmap == 0); - LASSERT (dev->rad_nobvirtmap == 0); - - LASSERT(dev->rad_scheduler == NULL); - RapkDestroyCQ(dev->rad_handle, dev->rad_fma_cqh); - RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh); - RapkReleaseDevice(dev->rad_handle); -} - -void -kranal_shutdown (lnet_ni_t *ni) -{ - int i; - unsigned long flags; - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - LASSERT (ni == kranal_data.kra_ni); - LASSERT (ni->ni_data == &kranal_data); - - switch (kranal_data.kra_init) { - default: - CERROR("Unexpected state %d\n", kranal_data.kra_init); - LBUG(); - - case RANAL_INIT_ALL: - /* Prevent new peers from being created */ - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - kranal_data.kra_nonewpeers = 1; - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - /* Remove all existing peers from the peer table */ - kranal_del_peer(LNET_NID_ANY); - - /* Wait for pending conn reqs to be handled */ - i = 2; - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - while (!list_empty(&kranal_data.kra_connd_acceptq)) { - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, - flags); - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */ - "waiting for conn reqs to clean up\n"); - cfs_pause(cfs_time_seconds(1)); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - } - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - /* Wait for all peers to be freed */ - i = 2; - while (atomic_read(&kranal_data.kra_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */ - "waiting for %d peers to close down\n", - atomic_read(&kranal_data.kra_npeers)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case RANAL_INIT_DATA: - break; - } - - /* Peer state all cleaned up BEFORE setting shutdown, so threads don't - * have to worry about shutdown races. NB connections may be created - * while there are still active connds, but these will be temporary - * since peer creation always fails after the listener has started to - * shut down. */ - LASSERT (atomic_read(&kranal_data.kra_npeers) == 0); - - /* Flag threads to terminate */ - kranal_data.kra_shutdown = 1; - - for (i = 0; i < kranal_data.kra_ndevs; i++) { - kra_device_t *dev = &kranal_data.kra_devices[i]; - - spin_lock_irqsave(&dev->rad_lock, flags); - wake_up(&dev->rad_waitq); - spin_unlock_irqrestore(&dev->rad_lock, flags); - } - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - wake_up_all(&kranal_data.kra_reaper_waitq); - spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags); - - LASSERT (list_empty(&kranal_data.kra_connd_peers)); - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - wake_up_all(&kranal_data.kra_connd_waitq); - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - /* Wait for threads to exit */ - i = 2; - while (atomic_read(&kranal_data.kra_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read(&kranal_data.kra_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - - LASSERT (atomic_read(&kranal_data.kra_npeers) == 0); - if (kranal_data.kra_peers != NULL) { - for (i = 0; i < kranal_data.kra_peer_hash_size; i++) - LASSERT (list_empty(&kranal_data.kra_peers[i])); - - LIBCFS_FREE(kranal_data.kra_peers, - sizeof (struct list_head) * - kranal_data.kra_peer_hash_size); - } - - LASSERT (atomic_read(&kranal_data.kra_nconns) == 0); - if (kranal_data.kra_conns != NULL) { - for (i = 0; i < kranal_data.kra_conn_hash_size; i++) - LASSERT (list_empty(&kranal_data.kra_conns[i])); - - LIBCFS_FREE(kranal_data.kra_conns, - sizeof (struct list_head) * - kranal_data.kra_conn_hash_size); - } - - for (i = 0; i < kranal_data.kra_ndevs; i++) - kranal_device_fini(&kranal_data.kra_devices[i]); - - kranal_free_txdescs(&kranal_data.kra_idle_txs); - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - kranal_data.kra_init = RANAL_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -int -kranal_startup (lnet_ni_t *ni) -{ - struct timeval tv; - int pkmem = atomic_read(&libcfs_kmemory); - int rc; - int i; - kra_device_t *dev; - - LASSERT (ni->ni_lnd == &the_kralnd); - - /* Only 1 instance supported */ - if (kranal_data.kra_init != RANAL_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (lnet_set_ip_niaddr(ni) != 0) { - CERROR ("Can't determine my NID\n"); - return -EPERM; - } - - if (*kranal_tunables.kra_credits > *kranal_tunables.kra_ntx) { - CERROR ("Can't set credits(%d) > ntx(%d)\n", - *kranal_tunables.kra_credits, - *kranal_tunables.kra_ntx); - return -EINVAL; - } - - memset(&kranal_data, 0, sizeof(kranal_data)); /* zero pointers, flags etc */ - - ni->ni_maxtxcredits = *kranal_tunables.kra_credits; - ni->ni_peertxcredits = *kranal_tunables.kra_peercredits; - - ni->ni_data = &kranal_data; - kranal_data.kra_ni = ni; - - /* CAVEAT EMPTOR: Every 'Fma' message includes the sender's NID and - * a unique (for all time) connstamp so we can uniquely identify - * the sender. The connstamp is an incrementing counter - * initialised with seconds + microseconds at startup time. So we - * rely on NOT creating connections more frequently on average than - * 1MHz to ensure we don't use old connstamps when we reboot. */ - do_gettimeofday(&tv); - kranal_data.kra_connstamp = - kranal_data.kra_peerstamp = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - rwlock_init(&kranal_data.kra_global_lock); - - for (i = 0; i < RANAL_MAXDEVS; i++ ) { - kra_device_t *dev = &kranal_data.kra_devices[i]; - - dev->rad_idx = i; - INIT_LIST_HEAD(&dev->rad_ready_conns); - INIT_LIST_HEAD(&dev->rad_new_conns); - init_waitqueue_head(&dev->rad_waitq); - spin_lock_init(&dev->rad_lock); - } - - kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT; - init_waitqueue_head(&kranal_data.kra_reaper_waitq); - spin_lock_init(&kranal_data.kra_reaper_lock); - - INIT_LIST_HEAD(&kranal_data.kra_connd_acceptq); - INIT_LIST_HEAD(&kranal_data.kra_connd_peers); - init_waitqueue_head(&kranal_data.kra_connd_waitq); - spin_lock_init(&kranal_data.kra_connd_lock); - - INIT_LIST_HEAD(&kranal_data.kra_idle_txs); - spin_lock_init(&kranal_data.kra_tx_lock); - - /* OK to call kranal_api_shutdown() to cleanup now */ - kranal_data.kra_init = RANAL_INIT_DATA; - PORTAL_MODULE_USE; - - kranal_data.kra_peer_hash_size = RANAL_PEER_HASH_SIZE; - LIBCFS_ALLOC(kranal_data.kra_peers, - sizeof(struct list_head) * kranal_data.kra_peer_hash_size); - if (kranal_data.kra_peers == NULL) - goto failed; - - for (i = 0; i < kranal_data.kra_peer_hash_size; i++) - INIT_LIST_HEAD(&kranal_data.kra_peers[i]); - - kranal_data.kra_conn_hash_size = RANAL_PEER_HASH_SIZE; - LIBCFS_ALLOC(kranal_data.kra_conns, - sizeof(struct list_head) * kranal_data.kra_conn_hash_size); - if (kranal_data.kra_conns == NULL) - goto failed; - - for (i = 0; i < kranal_data.kra_conn_hash_size; i++) - INIT_LIST_HEAD(&kranal_data.kra_conns[i]); - - rc = kranal_alloc_txdescs(&kranal_data.kra_idle_txs, - *kranal_tunables.kra_ntx); - if (rc != 0) - goto failed; - - rc = kranal_thread_start(kranal_reaper, NULL); - if (rc != 0) { - CERROR("Can't spawn ranal reaper: %d\n", rc); - goto failed; - } - - for (i = 0; i < *kranal_tunables.kra_n_connd; i++) { - rc = kranal_thread_start(kranal_connd, (void *)(unsigned long)i); - if (rc != 0) { - CERROR("Can't spawn ranal connd[%d]: %d\n", - i, rc); - goto failed; - } - } - - LASSERT (kranal_data.kra_ndevs == 0); - - /* Use all available RapidArray devices */ - for (i = 0; i < RANAL_MAXDEVS; i++) { - dev = &kranal_data.kra_devices[kranal_data.kra_ndevs]; - - rc = kranal_device_init(kranal_devids[i], dev); - if (rc == 0) - kranal_data.kra_ndevs++; - } - - if (kranal_data.kra_ndevs == 0) { - CERROR("Can't initialise any RapidArray devices\n"); - goto failed; - } - - for (i = 0; i < kranal_data.kra_ndevs; i++) { - dev = &kranal_data.kra_devices[i]; - rc = kranal_thread_start(kranal_scheduler, dev); - if (rc != 0) { - CERROR("Can't spawn ranal scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - /* flag everything initialised */ - kranal_data.kra_init = RANAL_INIT_ALL; - /*****************************************************/ - - CDEBUG(D_MALLOC, "initial kmem %d\n", pkmem); - return 0; - - failed: - kranal_shutdown(ni); - return -ENETDOWN; -} - -void __exit -kranal_module_fini (void) -{ - lnet_unregister_lnd(&the_kralnd); - kranal_tunables_fini(); -} - -int __init -kranal_module_init (void) -{ - int rc; - - rc = kranal_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kralnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel RapidArray LND v0.01"); -MODULE_LICENSE("GPL"); - -module_init(kranal_module_init); -module_exit(kranal_module_fini); diff --git a/lnet/klnds/ralnd/ralnd.h b/lnet/klnds/ralnd/ralnd.h deleted file mode 100644 index e4281c38622c770886220c900a4c389f21a11212..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/ralnd.h +++ /dev/null @@ -1,456 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> - -#include <net/sock.h> -#include <linux/in.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -#include <rapl.h> - -/* tunables determined at compile time */ -#define RANAL_RESCHED 100 /* # scheduler loops before reschedule */ - -#define RANAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define RANAL_CONN_HASH_SIZE 101 /* # conn lists */ - -#define RANAL_MIN_TIMEOUT 5 /* minimum timeout interval (seconds) */ -#define RANAL_TIMEOUT2KEEPALIVE(t) (((t)+1)/2) /* timeout -> keepalive interval */ - -/* fixed constants */ -#define RANAL_MAXDEVS 2 /* max # devices RapidArray supports */ -#define RANAL_FMA_MAX_PREFIX 232 /* max bytes in FMA "Prefix" we can use */ -#define RANAL_FMA_MAX_DATA ((7<<10)-256) /* Max FMA MSG is 7K including prefix */ - - -typedef struct -{ - int *kra_n_connd; /* # connection daemons */ - int *kra_min_reconnect_interval; /* first failed connection retry... */ - int *kra_max_reconnect_interval; /* ...exponentially increasing to this */ - int *kra_ntx; /* # tx descs */ - int *kra_credits; /* # concurrent sends */ - int *kra_peercredits; /* # concurrent sends to 1 peer */ - int *kra_fma_cq_size; /* # entries in receive CQ */ - int *kra_timeout; /* comms timeout (seconds) */ - int *kra_max_immediate; /* immediate payload breakpoint */ - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - cfs_sysctl_table_header_t *kra_sysctl; /* sysctl interface */ -#endif -} kra_tunables_t; - -typedef struct -{ - RAP_PVOID rad_handle; /* device handle */ - RAP_PVOID rad_fma_cqh; /* FMA completion queue handle */ - RAP_PVOID rad_rdma_cqh; /* rdma completion queue handle */ - int rad_id; /* device id */ - int rad_idx; /* index in kra_devices */ - int rad_ready; /* set by device callback */ - struct list_head rad_ready_conns;/* connections ready to tx/rx */ - struct list_head rad_new_conns; /* new connections to complete */ - wait_queue_head_t rad_waitq; /* scheduler waits here */ - spinlock_t rad_lock; /* serialise */ - void *rad_scheduler; /* scheduling thread */ - unsigned int rad_nphysmap; /* # phys mappings */ - unsigned int rad_nppphysmap; /* # phys pages mapped */ - unsigned int rad_nvirtmap; /* # virt mappings */ - unsigned long rad_nobvirtmap; /* # virt bytes mapped */ -} kra_device_t; - -typedef struct -{ - int kra_init; /* initialisation state */ - int kra_shutdown; /* shut down? */ - atomic_t kra_nthreads; /* # live threads */ - lnet_ni_t *kra_ni; /* _the_ nal instance */ - - kra_device_t kra_devices[RANAL_MAXDEVS]; /* device/ptag/cq etc */ - int kra_ndevs; /* # devices */ - - rwlock_t kra_global_lock; /* stabilize peer/conn ops */ - - struct list_head *kra_peers; /* hash table of all my known peers */ - int kra_peer_hash_size; /* size of kra_peers */ - atomic_t kra_npeers; /* # peers extant */ - int kra_nonewpeers; /* prevent new peers */ - - struct list_head *kra_conns; /* conns hashed by cqid */ - int kra_conn_hash_size; /* size of kra_conns */ - __u64 kra_peerstamp; /* when I started up */ - __u64 kra_connstamp; /* conn stamp generator */ - int kra_next_cqid; /* cqid generator */ - atomic_t kra_nconns; /* # connections extant */ - - long kra_new_min_timeout; /* minimum timeout on any new conn */ - wait_queue_head_t kra_reaper_waitq; /* reaper sleeps here */ - spinlock_t kra_reaper_lock; /* serialise */ - - struct list_head kra_connd_peers; /* peers waiting for a connection */ - struct list_head kra_connd_acceptq; /* accepted sockets to handshake */ - wait_queue_head_t kra_connd_waitq; /* connection daemons sleep here */ - spinlock_t kra_connd_lock; /* serialise */ - - struct list_head kra_idle_txs; /* idle tx descriptors */ - __u64 kra_next_tx_cookie; /* RDMA completion cookie */ - spinlock_t kra_tx_lock; /* serialise */ -} kra_data_t; - -#define RANAL_INIT_NOTHING 0 -#define RANAL_INIT_DATA 1 -#define RANAL_INIT_ALL 2 - -typedef struct kra_acceptsock /* accepted socket queued for connd */ -{ - struct list_head ras_list; /* queue for attention */ - struct socket *ras_sock; /* the accepted socket */ -} kra_acceptsock_t; - -/************************************************************************ - * Wire message structs. These are sent in sender's byte order - * (i.e. receiver checks magic and flips if required). - */ - -typedef struct kra_connreq /* connection request/response */ -{ /* (sent via socket) */ - __u32 racr_magic; /* I'm an ranal connreq */ - __u16 racr_version; /* this is my version number */ - __u16 racr_devid; /* sender's device ID */ - __u64 racr_srcnid; /* sender's NID */ - __u64 racr_dstnid; /* who sender expects to listen */ - __u64 racr_peerstamp; /* sender's instance stamp */ - __u64 racr_connstamp; /* sender's connection stamp */ - __u32 racr_timeout; /* sender's timeout */ - RAP_RI_PARAMETERS racr_riparams; /* sender's endpoint info */ -} kra_connreq_t; - -typedef struct -{ - RAP_MEM_KEY rard_key; - RAP_PVOID64 rard_addr; - RAP_UINT32 rard_nob; -} kra_rdma_desc_t; - -typedef struct -{ - lnet_hdr_t raim_hdr; /* portals header */ - /* Portals payload is in FMA "Message Data" */ -} kra_immediate_msg_t; - -typedef struct -{ - lnet_hdr_t raprm_hdr; /* portals header */ - __u64 raprm_cookie; /* opaque completion cookie */ -} kra_putreq_msg_t; - -typedef struct -{ - __u64 rapam_src_cookie; /* reflected completion cookie */ - __u64 rapam_dst_cookie; /* opaque completion cookie */ - kra_rdma_desc_t rapam_desc; /* sender's sink buffer */ -} kra_putack_msg_t; - -typedef struct -{ - lnet_hdr_t ragm_hdr; /* portals header */ - __u64 ragm_cookie; /* opaque completion cookie */ - kra_rdma_desc_t ragm_desc; /* sender's sink buffer */ -} kra_get_msg_t; - -typedef struct -{ - __u64 racm_cookie; /* reflected completion cookie */ -} kra_completion_msg_t; - -typedef struct /* NB must fit in FMA "Prefix" */ -{ - __u32 ram_magic; /* I'm an ranal message */ - __u16 ram_version; /* this is my version number */ - __u16 ram_type; /* msg type */ - __u64 ram_srcnid; /* sender's NID */ - __u64 ram_connstamp; /* sender's connection stamp */ - union { - kra_immediate_msg_t immediate; - kra_putreq_msg_t putreq; - kra_putack_msg_t putack; - kra_get_msg_t get; - kra_completion_msg_t completion; - } ram_u; - __u32 ram_seq; /* incrementing sequence number */ -} kra_msg_t; - -#define RANAL_MSG_MAGIC LNET_PROTO_RA_MAGIC /* unique magic */ -#define RANAL_MSG_VERSION 1 /* current protocol version */ - -#define RANAL_MSG_FENCE 0x80 /* fence RDMA */ - -#define RANAL_MSG_NONE 0x00 /* illegal message */ -#define RANAL_MSG_NOOP 0x01 /* empty ram_u (keepalive) */ -#define RANAL_MSG_IMMEDIATE 0x02 /* ram_u.immediate */ -#define RANAL_MSG_PUT_REQ 0x03 /* ram_u.putreq (src->sink) */ -#define RANAL_MSG_PUT_NAK 0x04 /* ram_u.completion (no PUT match: sink->src) */ -#define RANAL_MSG_PUT_ACK 0x05 /* ram_u.putack (PUT matched: sink->src) */ -#define RANAL_MSG_PUT_DONE 0x86 /* ram_u.completion (src->sink) */ -#define RANAL_MSG_GET_REQ 0x07 /* ram_u.get (sink->src) */ -#define RANAL_MSG_GET_NAK 0x08 /* ram_u.completion (no GET match: src->sink) */ -#define RANAL_MSG_GET_DONE 0x89 /* ram_u.completion (src->sink) */ -#define RANAL_MSG_CLOSE 0x8a /* empty ram_u */ - -/***********************************************************************/ - -typedef struct kra_tx /* message descriptor */ -{ - struct list_head tx_list; /* queue on idle_txs/rac_sendq/rac_waitq */ - struct kra_conn *tx_conn; /* owning conn */ - lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */ - unsigned long tx_qtime; /* when tx started to wait for something (jiffies) */ - int tx_nob; /* # bytes of payload */ - int tx_buftype; /* payload buffer type */ - void *tx_buffer; /* source/sink buffer */ - int tx_phys_offset; /* first page offset (if phys) */ - int tx_phys_npages; /* # physical pages */ - RAP_PHYS_REGION *tx_phys; /* page descriptors */ - RAP_MEM_KEY tx_map_key; /* mapping key */ - RAP_RDMA_DESCRIPTOR tx_rdma_desc; /* rdma descriptor */ - __u64 tx_cookie; /* identify this tx to peer */ - kra_msg_t tx_msg; /* FMA message buffer */ -} kra_tx_t; - -#define RANAL_BUF_NONE 0 /* buffer type not set */ -#define RANAL_BUF_IMMEDIATE 1 /* immediate data */ -#define RANAL_BUF_PHYS_UNMAPPED 2 /* physical: not mapped yet */ -#define RANAL_BUF_PHYS_MAPPED 3 /* physical: mapped already */ -#define RANAL_BUF_VIRT_UNMAPPED 4 /* virtual: not mapped yet */ -#define RANAL_BUF_VIRT_MAPPED 5 /* virtual: mapped already */ - -typedef struct kra_conn -{ - struct kra_peer *rac_peer; /* owning peer */ - struct list_head rac_list; /* stash on peer's conn list */ - struct list_head rac_hashlist; /* stash in connection hash table */ - struct list_head rac_schedlist; /* schedule (on rad_???_conns) for attention */ - struct list_head rac_fmaq; /* txs queued for FMA */ - struct list_head rac_rdmaq; /* txs awaiting RDMA completion */ - struct list_head rac_replyq; /* txs awaiting replies */ - __u64 rac_peerstamp; /* peer's unique stamp */ - __u64 rac_peer_connstamp; /* peer's unique connection stamp */ - __u64 rac_my_connstamp; /* my unique connection stamp */ - unsigned long rac_last_tx; /* when I last sent an FMA message (jiffies) */ - unsigned long rac_last_rx; /* when I last received an FMA messages (jiffies) */ - long rac_keepalive; /* keepalive interval (seconds) */ - long rac_timeout; /* infer peer death if no rx for this many seconds */ - __u32 rac_cqid; /* my completion callback id (non-unique) */ - __u32 rac_tx_seq; /* tx msg sequence number */ - __u32 rac_rx_seq; /* rx msg sequence number */ - atomic_t rac_refcount; /* # users */ - unsigned int rac_close_sent; /* I've sent CLOSE */ - unsigned int rac_close_recvd; /* I've received CLOSE */ - unsigned int rac_state; /* connection state */ - unsigned int rac_scheduled; /* being attented to */ - spinlock_t rac_lock; /* serialise */ - kra_device_t *rac_device; /* which device */ - RAP_PVOID rac_rihandle; /* RA endpoint */ - kra_msg_t *rac_rxmsg; /* incoming message (FMA prefix) */ - kra_msg_t rac_msg; /* keepalive/CLOSE message buffer */ -} kra_conn_t; - -#define RANAL_CONN_ESTABLISHED 0 -#define RANAL_CONN_CLOSING 1 -#define RANAL_CONN_CLOSED 2 - -typedef struct kra_peer -{ - struct list_head rap_list; /* stash on global peer list */ - struct list_head rap_connd_list; /* schedule on kra_connd_peers */ - struct list_head rap_conns; /* all active connections */ - struct list_head rap_tx_queue; /* msgs waiting for a conn */ - lnet_nid_t rap_nid; /* who's on the other end(s) */ - __u32 rap_ip; /* IP address of peer */ - int rap_port; /* port on which peer listens */ - atomic_t rap_refcount; /* # users */ - int rap_persistence; /* "known" peer refs */ - int rap_connecting; /* connection forming */ - unsigned long rap_reconnect_time; /* CURRENT_SECONDS when reconnect OK */ - unsigned long rap_reconnect_interval; /* exponential backoff */ -} kra_peer_t; - -extern kra_data_t kranal_data; -extern kra_tunables_t kranal_tunables; - -extern void kranal_destroy_peer(kra_peer_t *peer); -extern void kranal_destroy_conn(kra_conn_t *conn); - -static inline void -kranal_peer_addref(kra_peer_t *peer) -{ - CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid)); - LASSERT(atomic_read(&peer->rap_refcount) > 0); - atomic_inc(&peer->rap_refcount); -} - -static inline void -kranal_peer_decref(kra_peer_t *peer) -{ - CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid)); - LASSERT(atomic_read(&peer->rap_refcount) > 0); - if (atomic_dec_and_test(&peer->rap_refcount)) - kranal_destroy_peer(peer); -} - -static inline struct list_head * -kranal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % kranal_data.kra_peer_hash_size; - - return (&kranal_data.kra_peers[hash]); -} - -static inline int -kranal_peer_active(kra_peer_t *peer) -{ - /* Am I in the peer hash table? */ - return (!list_empty(&peer->rap_list)); -} - -static inline void -kranal_conn_addref(kra_conn_t *conn) -{ - CDEBUG(D_NET, "%p->%s\n", conn, - libcfs_nid2str(conn->rac_peer->rap_nid)); - LASSERT(atomic_read(&conn->rac_refcount) > 0); - atomic_inc(&conn->rac_refcount); -} - -static inline void -kranal_conn_decref(kra_conn_t *conn) -{ - CDEBUG(D_NET, "%p->%s\n", conn, - libcfs_nid2str(conn->rac_peer->rap_nid)); - LASSERT(atomic_read(&conn->rac_refcount) > 0); - if (atomic_dec_and_test(&conn->rac_refcount)) - kranal_destroy_conn(conn); -} - -static inline struct list_head * -kranal_cqid2connlist (__u32 cqid) -{ - unsigned int hash = cqid % kranal_data.kra_conn_hash_size; - - return (&kranal_data.kra_conns [hash]); -} - -static inline kra_conn_t * -kranal_cqid2conn_locked (__u32 cqid) -{ - struct list_head *conns = kranal_cqid2connlist(cqid); - struct list_head *tmp; - kra_conn_t *conn; - - list_for_each(tmp, conns) { - conn = list_entry(tmp, kra_conn_t, rac_hashlist); - - if (conn->rac_cqid == cqid) - return conn; - } - - return NULL; -} - -static inline int -kranal_tx_mapped (kra_tx_t *tx) -{ - return (tx->tx_buftype == RANAL_BUF_VIRT_MAPPED || - tx->tx_buftype == RANAL_BUF_PHYS_MAPPED); -} - -int kranal_startup (lnet_ni_t *ni); -void kranal_shutdown (lnet_ni_t *ni); -int kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kranal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kranal_eager_recv(lnet_ni_t *ni, void *private, - lnet_msg_t *lntmsg, void **new_private); -int kranal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int kranal_accept(lnet_ni_t *ni, struct socket *sock); - -extern void kranal_free_acceptsock (kra_acceptsock_t *ras); -extern int kranal_listener_procint (cfs_sysctl_table_t *table, - int write, struct file *filp, - void *buffer, size_t *lenp); -extern void kranal_update_reaper_timeout (long timeout); -extern void kranal_tx_done (kra_tx_t *tx, int completion); -extern void kranal_unlink_peer_locked (kra_peer_t *peer); -extern void kranal_schedule_conn (kra_conn_t *conn); -extern int kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid); -extern int kranal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port); -extern kra_peer_t *kranal_find_peer_locked (lnet_nid_t nid); -extern void kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx); -extern int kranal_del_peer (lnet_nid_t nid); -extern void kranal_device_callback (RAP_INT32 devid, RAP_PVOID arg); -extern int kranal_thread_start (int(*fn)(void *arg), void *arg); -extern int kranal_connd (void *arg); -extern int kranal_reaper (void *arg); -extern int kranal_scheduler (void *arg); -extern void kranal_close_conn_locked (kra_conn_t *conn, int error); -extern void kranal_close_conn (kra_conn_t *conn, int error); -extern void kranal_terminate_conn_locked (kra_conn_t *conn); -extern void kranal_connect (kra_peer_t *peer); -extern int kranal_conn_handshake (struct socket *sock, kra_peer_t *peer); -extern int kranal_tunables_init(void); -extern void kranal_tunables_fini(void); -extern void kranal_init_msg(kra_msg_t *msg, int type); diff --git a/lnet/klnds/ralnd/ralnd_cb.c b/lnet/klnds/ralnd/ralnd_cb.c deleted file mode 100644 index 96296d53e53c2f95e1e44d4e6410bac6ffe22062..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/ralnd_cb.c +++ /dev/null @@ -1,2036 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "ralnd.h" - -void -kranal_device_callback(RAP_INT32 devid, RAP_PVOID arg) -{ - kra_device_t *dev; - int i; - unsigned long flags; - - CDEBUG(D_NET, "callback for device %d\n", devid); - - for (i = 0; i < kranal_data.kra_ndevs; i++) { - - dev = &kranal_data.kra_devices[i]; - if (dev->rad_id != devid) - continue; - - spin_lock_irqsave(&dev->rad_lock, flags); - - if (!dev->rad_ready) { - dev->rad_ready = 1; - wake_up(&dev->rad_waitq); - } - - spin_unlock_irqrestore(&dev->rad_lock, flags); - return; - } - - CWARN("callback for unknown device %d\n", devid); -} - -void -kranal_schedule_conn(kra_conn_t *conn) -{ - kra_device_t *dev = conn->rac_device; - unsigned long flags; - - spin_lock_irqsave(&dev->rad_lock, flags); - - if (!conn->rac_scheduled) { - kranal_conn_addref(conn); /* +1 ref for scheduler */ - conn->rac_scheduled = 1; - list_add_tail(&conn->rac_schedlist, &dev->rad_ready_conns); - wake_up(&dev->rad_waitq); - } - - spin_unlock_irqrestore(&dev->rad_lock, flags); -} - -kra_tx_t * -kranal_get_idle_tx (void) -{ - unsigned long flags; - kra_tx_t *tx; - - spin_lock_irqsave(&kranal_data.kra_tx_lock, flags); - - if (list_empty(&kranal_data.kra_idle_txs)) { - spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags); - return NULL; - } - - tx = list_entry(kranal_data.kra_idle_txs.next, kra_tx_t, tx_list); - list_del(&tx->tx_list); - - /* Allocate a new completion cookie. It might not be needed, but we've - * got a lock right now... */ - tx->tx_cookie = kranal_data.kra_next_tx_cookie++; - - spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags); - - LASSERT (tx->tx_buftype == RANAL_BUF_NONE); - LASSERT (tx->tx_msg.ram_type == RANAL_MSG_NONE); - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); - - return tx; -} - -void -kranal_init_msg(kra_msg_t *msg, int type) -{ - msg->ram_magic = RANAL_MSG_MAGIC; - msg->ram_version = RANAL_MSG_VERSION; - msg->ram_type = type; - msg->ram_srcnid = kranal_data.kra_ni->ni_nid; - /* ram_connstamp gets set when FMA is sent */ -} - -kra_tx_t * -kranal_new_tx_msg (int type) -{ - kra_tx_t *tx = kranal_get_idle_tx(); - - if (tx != NULL) - kranal_init_msg(&tx->tx_msg, type); - - return tx; -} - -int -kranal_setup_immediate_buffer (kra_tx_t *tx, - unsigned int niov, struct iovec *iov, - int offset, int nob) - -{ - /* For now this is almost identical to kranal_setup_virt_buffer, but we - * could "flatten" the payload into a single contiguous buffer ready - * for sending direct over an FMA if we ever needed to. */ - - LASSERT (tx->tx_buftype == RANAL_BUF_NONE); - LASSERT (nob >= 0); - - if (nob == 0) { - tx->tx_buffer = NULL; - } else { - LASSERT (niov > 0); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR("Can't handle multiple vaddr fragments\n"); - return -EMSGSIZE; - } - - tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset); - } - - tx->tx_buftype = RANAL_BUF_IMMEDIATE; - tx->tx_nob = nob; - return 0; -} - -int -kranal_setup_virt_buffer (kra_tx_t *tx, - unsigned int niov, struct iovec *iov, - int offset, int nob) - -{ - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT (tx->tx_buftype == RANAL_BUF_NONE); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR("Can't handle multiple vaddr fragments\n"); - return -EMSGSIZE; - } - - tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED; - tx->tx_nob = nob; - tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset); - return 0; -} - -int -kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, lnet_kiov_t *kiov, - int offset, int nob) -{ - RAP_PHYS_REGION *phys = tx->tx_phys; - int resid; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (tx->tx_buftype == RANAL_BUF_NONE); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED; - tx->tx_nob = nob; - tx->tx_buffer = (void *)((unsigned long)(kiov->kiov_offset + offset)); - - phys->Address = lnet_page2phys(kiov->kiov_page); - phys++; - - resid = nob - (kiov->kiov_len - offset); - while (resid > 0) { - kiov++; - nkiov--; - LASSERT (nkiov > 0); - - if (kiov->kiov_offset != 0 || - ((resid > PAGE_SIZE) && - kiov->kiov_len < PAGE_SIZE)) { - /* Can't have gaps */ - CERROR("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", - (int)(phys - tx->tx_phys), - kiov->kiov_offset, kiov->kiov_len); - return -EINVAL; - } - - if ((phys - tx->tx_phys) == LNET_MAX_IOV) { - CERROR ("payload too big (%d)\n", (int)(phys - tx->tx_phys)); - return -EMSGSIZE; - } - - phys->Address = lnet_page2phys(kiov->kiov_page); - phys++; - - resid -= PAGE_SIZE; - } - - tx->tx_phys_npages = phys - tx->tx_phys; - return 0; -} - -static inline int -kranal_setup_rdma_buffer (kra_tx_t *tx, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - int offset, int nob) -{ - LASSERT ((iov == NULL) != (kiov == NULL)); - - if (kiov != NULL) - return kranal_setup_phys_buffer(tx, niov, kiov, offset, nob); - - return kranal_setup_virt_buffer(tx, niov, iov, offset, nob); -} - -int -kranal_map_buffer (kra_tx_t *tx) -{ - kra_conn_t *conn = tx->tx_conn; - kra_device_t *dev = conn->rac_device; - RAP_RETURN rrc; - - LASSERT (current == dev->rad_scheduler); - - switch (tx->tx_buftype) { - default: - LBUG(); - - case RANAL_BUF_NONE: - case RANAL_BUF_IMMEDIATE: - case RANAL_BUF_PHYS_MAPPED: - case RANAL_BUF_VIRT_MAPPED: - return 0; - - case RANAL_BUF_PHYS_UNMAPPED: - rrc = RapkRegisterPhys(dev->rad_handle, - tx->tx_phys, tx->tx_phys_npages, - &tx->tx_map_key); - if (rrc != RAP_SUCCESS) { - CERROR ("Can't map %d pages: dev %d " - "phys %u pp %u, virt %u nob %lu\n", - tx->tx_phys_npages, dev->rad_id, - dev->rad_nphysmap, dev->rad_nppphysmap, - dev->rad_nvirtmap, dev->rad_nobvirtmap); - return -ENOMEM; /* assume insufficient resources */ - } - - dev->rad_nphysmap++; - dev->rad_nppphysmap += tx->tx_phys_npages; - - tx->tx_buftype = RANAL_BUF_PHYS_MAPPED; - return 0; - - case RANAL_BUF_VIRT_UNMAPPED: - rrc = RapkRegisterMemory(dev->rad_handle, - tx->tx_buffer, tx->tx_nob, - &tx->tx_map_key); - if (rrc != RAP_SUCCESS) { - CERROR ("Can't map %d bytes: dev %d " - "phys %u pp %u, virt %u nob %lu\n", - tx->tx_nob, dev->rad_id, - dev->rad_nphysmap, dev->rad_nppphysmap, - dev->rad_nvirtmap, dev->rad_nobvirtmap); - return -ENOMEM; /* assume insufficient resources */ - } - - dev->rad_nvirtmap++; - dev->rad_nobvirtmap += tx->tx_nob; - - tx->tx_buftype = RANAL_BUF_VIRT_MAPPED; - return 0; - } -} - -void -kranal_unmap_buffer (kra_tx_t *tx) -{ - kra_device_t *dev; - RAP_RETURN rrc; - - switch (tx->tx_buftype) { - default: - LBUG(); - - case RANAL_BUF_NONE: - case RANAL_BUF_IMMEDIATE: - case RANAL_BUF_PHYS_UNMAPPED: - case RANAL_BUF_VIRT_UNMAPPED: - break; - - case RANAL_BUF_PHYS_MAPPED: - LASSERT (tx->tx_conn != NULL); - dev = tx->tx_conn->rac_device; - LASSERT (current == dev->rad_scheduler); - rrc = RapkDeregisterMemory(dev->rad_handle, NULL, - &tx->tx_map_key); - LASSERT (rrc == RAP_SUCCESS); - - dev->rad_nphysmap--; - dev->rad_nppphysmap -= tx->tx_phys_npages; - - tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED; - break; - - case RANAL_BUF_VIRT_MAPPED: - LASSERT (tx->tx_conn != NULL); - dev = tx->tx_conn->rac_device; - LASSERT (current == dev->rad_scheduler); - rrc = RapkDeregisterMemory(dev->rad_handle, tx->tx_buffer, - &tx->tx_map_key); - LASSERT (rrc == RAP_SUCCESS); - - dev->rad_nvirtmap--; - dev->rad_nobvirtmap -= tx->tx_nob; - - tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED; - break; - } -} - -void -kranal_tx_done (kra_tx_t *tx, int completion) -{ - lnet_msg_t *lnetmsg[2]; - unsigned long flags; - int i; - - LASSERT (!in_interrupt()); - - kranal_unmap_buffer(tx); - - lnetmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lnetmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - - tx->tx_buftype = RANAL_BUF_NONE; - tx->tx_msg.ram_type = RANAL_MSG_NONE; - tx->tx_conn = NULL; - - spin_lock_irqsave(&kranal_data.kra_tx_lock, flags); - - list_add_tail(&tx->tx_list, &kranal_data.kra_idle_txs); - - spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags); - - /* finalize AFTER freeing lnet msgs */ - for (i = 0; i < 2; i++) { - if (lnetmsg[i] == NULL) - continue; - - lnet_finalize(kranal_data.kra_ni, lnetmsg[i], completion); - } -} - -kra_conn_t * -kranal_find_conn_locked (kra_peer_t *peer) -{ - struct list_head *tmp; - - /* just return the first connection */ - list_for_each (tmp, &peer->rap_conns) { - return list_entry(tmp, kra_conn_t, rac_list); - } - - return NULL; -} - -void -kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx) -{ - unsigned long flags; - - tx->tx_conn = conn; - - spin_lock_irqsave(&conn->rac_lock, flags); - list_add_tail(&tx->tx_list, &conn->rac_fmaq); - tx->tx_qtime = jiffies; - spin_unlock_irqrestore(&conn->rac_lock, flags); - - kranal_schedule_conn(conn); -} - -void -kranal_launch_tx (kra_tx_t *tx, lnet_nid_t nid) -{ - unsigned long flags; - kra_peer_t *peer; - kra_conn_t *conn; - int rc; - int retry; - rwlock_t *g_lock = &kranal_data.kra_global_lock; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - - for (retry = 0; ; retry = 1) { - - read_lock(g_lock); - - peer = kranal_find_peer_locked(nid); - if (peer != NULL) { - conn = kranal_find_conn_locked(peer); - if (conn != NULL) { - kranal_post_fma(conn, tx); - read_unlock(g_lock); - return; - } - } - - /* Making connections; I'll need a write lock... */ - read_unlock(g_lock); - write_lock_irqsave(g_lock, flags); - - peer = kranal_find_peer_locked(nid); - if (peer != NULL) - break; - - write_unlock_irqrestore(g_lock, flags); - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); - kranal_tx_done(tx, -EHOSTUNREACH); - return; - } - - rc = kranal_add_persistent_peer(nid, LNET_NIDADDR(nid), - lnet_acceptor_port()); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_nid2str(nid), rc); - kranal_tx_done(tx, rc); - return; - } - } - - conn = kranal_find_conn_locked(peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - kranal_post_fma(conn, tx); - write_unlock_irqrestore(g_lock, flags); - return; - } - - LASSERT (peer->rap_persistence > 0); - - if (!peer->rap_connecting) { - LASSERT (list_empty(&peer->rap_tx_queue)); - - if (!(peer->rap_reconnect_interval == 0 || /* first attempt */ - time_after_eq(jiffies, peer->rap_reconnect_time))) { - write_unlock_irqrestore(g_lock, flags); - kranal_tx_done(tx, -EHOSTUNREACH); - return; - } - - peer->rap_connecting = 1; - kranal_peer_addref(peer); /* extra ref for connd */ - - spin_lock(&kranal_data.kra_connd_lock); - - list_add_tail(&peer->rap_connd_list, - &kranal_data.kra_connd_peers); - wake_up(&kranal_data.kra_connd_waitq); - - spin_unlock(&kranal_data.kra_connd_lock); - } - - /* A connection is being established; queue the message... */ - list_add_tail(&tx->tx_list, &peer->rap_tx_queue); - - write_unlock_irqrestore(g_lock, flags); -} - -void -kranal_rdma(kra_tx_t *tx, int type, - kra_rdma_desc_t *sink, int nob, __u64 cookie) -{ - kra_conn_t *conn = tx->tx_conn; - RAP_RETURN rrc; - unsigned long flags; - - LASSERT (kranal_tx_mapped(tx)); - LASSERT (nob <= sink->rard_nob); - LASSERT (nob <= tx->tx_nob); - - /* No actual race with scheduler sending CLOSE (I'm she!) */ - LASSERT (current == conn->rac_device->rad_scheduler); - - memset(&tx->tx_rdma_desc, 0, sizeof(tx->tx_rdma_desc)); - tx->tx_rdma_desc.SrcPtr.AddressBits = (__u64)((unsigned long)tx->tx_buffer); - tx->tx_rdma_desc.SrcKey = tx->tx_map_key; - tx->tx_rdma_desc.DstPtr = sink->rard_addr; - tx->tx_rdma_desc.DstKey = sink->rard_key; - tx->tx_rdma_desc.Length = nob; - tx->tx_rdma_desc.AppPtr = tx; - - /* prep final completion message */ - kranal_init_msg(&tx->tx_msg, type); - tx->tx_msg.ram_u.completion.racm_cookie = cookie; - - if (nob == 0) { /* Immediate completion */ - kranal_post_fma(conn, tx); - return; - } - - LASSERT (!conn->rac_close_sent); /* Don't lie (CLOSE == RDMA idle) */ - - rrc = RapkPostRdma(conn->rac_rihandle, &tx->tx_rdma_desc); - LASSERT (rrc == RAP_SUCCESS); - - spin_lock_irqsave(&conn->rac_lock, flags); - list_add_tail(&tx->tx_list, &conn->rac_rdmaq); - tx->tx_qtime = jiffies; - spin_unlock_irqrestore(&conn->rac_lock, flags); -} - -int -kranal_consume_rxmsg (kra_conn_t *conn, void *buffer, int nob) -{ - __u32 nob_received = nob; - RAP_RETURN rrc; - - LASSERT (conn->rac_rxmsg != NULL); - CDEBUG(D_NET, "Consuming %p\n", conn); - - rrc = RapkFmaCopyOut(conn->rac_rihandle, buffer, - &nob_received, sizeof(kra_msg_t)); - LASSERT (rrc == RAP_SUCCESS); - - conn->rac_rxmsg = NULL; - - if (nob_received < nob) { - CWARN("Incomplete immediate msg from %s: expected %d, got %d\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - nob, nob_received); - return -EPROTO; - } - - return 0; -} - -int -kranal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kra_tx_t *tx; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - nob, niov, libcfs_id2str(target)); - - LASSERT (nob == 0 || niov > 0); - LASSERT (niov <= LNET_MAX_IOV); - - LASSERT (!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(kiov != NULL && iov != NULL)); - - if (routing) { - CERROR ("Can't route\n"); - return -EIO; - } - - switch(type) { - default: - LBUG(); - - case LNET_MSG_ACK: - LASSERT (nob == 0); - break; - - case LNET_MSG_GET: - LASSERT (niov == 0); - LASSERT (nob == 0); - /* We have to consider the eventual sink buffer rather than any - * payload passed here (there isn't any, and strictly, looking - * inside lntmsg is a layering violation). We send a simple - * IMMEDIATE GET if the sink buffer is mapped already and small - * enough for FMA */ - - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0 && - lntmsg->msg_md->md_length <= RANAL_FMA_MAX_DATA && - lntmsg->msg_md->md_length <= *kranal_tunables.kra_max_immediate) - break; /* send IMMEDIATE */ - - tx = kranal_new_tx_msg(RANAL_MSG_GET_REQ); - if (tx == NULL) - return -ENOMEM; - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - rc = kranal_setup_virt_buffer(tx, lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kranal_setup_phys_buffer(tx, lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc != 0) { - kranal_tx_done(tx, rc); - return -EIO; - } - - tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR("Can't create reply for GET to %s\n", - libcfs_nid2str(target.nid)); - kranal_tx_done(tx, rc); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; - tx->tx_msg.ram_u.get.ragm_hdr = *hdr; - /* rest of tx_msg is setup just before it is sent */ - kranal_launch_tx(tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - if (kiov == NULL && /* not paged */ - nob <= RANAL_FMA_MAX_DATA && /* small enough */ - nob <= *kranal_tunables.kra_max_immediate) - break; /* send IMMEDIATE */ - - tx = kranal_new_tx_msg(RANAL_MSG_PUT_REQ); - if (tx == NULL) - return -ENOMEM; - - rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, nob); - if (rc != 0) { - kranal_tx_done(tx, rc); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; - tx->tx_msg.ram_u.putreq.raprm_hdr = *hdr; - /* rest of tx_msg is setup just before it is sent */ - kranal_launch_tx(tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT (kiov == NULL); - LASSERT (nob <= RANAL_FMA_MAX_DATA); - - tx = kranal_new_tx_msg(RANAL_MSG_IMMEDIATE); - if (tx == NULL) - return -ENOMEM; - - rc = kranal_setup_immediate_buffer(tx, niov, iov, offset, nob); - if (rc != 0) { - kranal_tx_done(tx, rc); - return -EIO; - } - - tx->tx_msg.ram_u.immediate.raim_hdr = *hdr; - tx->tx_lntmsg[0] = lntmsg; - kranal_launch_tx(tx, target.nid); - return 0; -} - -void -kranal_reply(lnet_ni_t *ni, kra_conn_t *conn, lnet_msg_t *lntmsg) -{ - kra_msg_t *rxmsg = conn->rac_rxmsg; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kra_tx_t *tx; - int rc; - - tx = kranal_get_idle_tx(); - if (tx == NULL) - goto failed_0; - - rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, nob); - if (rc != 0) - goto failed_1; - - tx->tx_conn = conn; - - rc = kranal_map_buffer(tx); - if (rc != 0) - goto failed_1; - - tx->tx_lntmsg[0] = lntmsg; - - kranal_rdma(tx, RANAL_MSG_GET_DONE, - &rxmsg->ram_u.get.ragm_desc, nob, - rxmsg->ram_u.get.ragm_cookie); - return; - - failed_1: - kranal_tx_done(tx, -EIO); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kranal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - void **new_private) -{ - kra_conn_t *conn = (kra_conn_t *)private; - - LCONSOLE_ERROR_MSG(0x12b, "Dropping message from %s: no buffers free.\n", - libcfs_nid2str(conn->rac_peer->rap_nid)); - - return -EDEADLK; -} - -int -kranal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kra_conn_t *conn = private; - kra_msg_t *rxmsg = conn->rac_rxmsg; - kra_tx_t *tx; - void *buffer; - int rc; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - CDEBUG(D_NET, "conn %p, rxmsg %p, lntmsg %p\n", conn, rxmsg, lntmsg); - - switch(rxmsg->ram_type) { - default: - LBUG(); - - case RANAL_MSG_IMMEDIATE: - if (mlen == 0) { - buffer = NULL; - } else if (kiov != NULL) { - CERROR("Can't recv immediate into paged buffer\n"); - return -EIO; - } else { - LASSERT (niov > 0); - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - if (mlen > iov->iov_len - offset) { - CERROR("Can't handle immediate frags\n"); - return -EIO; - } - buffer = ((char *)iov->iov_base) + offset; - } - rc = kranal_consume_rxmsg(conn, buffer, mlen); - lnet_finalize(ni, lntmsg, (rc == 0) ? 0 : -EIO); - return 0; - - case RANAL_MSG_PUT_REQ: - tx = kranal_new_tx_msg(RANAL_MSG_PUT_ACK); - if (tx == NULL) { - kranal_consume_rxmsg(conn, NULL, 0); - return -ENOMEM; - } - - rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, mlen); - if (rc != 0) { - kranal_tx_done(tx, rc); - kranal_consume_rxmsg(conn, NULL, 0); - return -EIO; - } - - tx->tx_conn = conn; - rc = kranal_map_buffer(tx); - if (rc != 0) { - kranal_tx_done(tx, rc); - kranal_consume_rxmsg(conn, NULL, 0); - return -EIO; - } - - tx->tx_msg.ram_u.putack.rapam_src_cookie = - conn->rac_rxmsg->ram_u.putreq.raprm_cookie; - tx->tx_msg.ram_u.putack.rapam_dst_cookie = tx->tx_cookie; - tx->tx_msg.ram_u.putack.rapam_desc.rard_key = tx->tx_map_key; - tx->tx_msg.ram_u.putack.rapam_desc.rard_addr.AddressBits = - (__u64)((unsigned long)tx->tx_buffer); - tx->tx_msg.ram_u.putack.rapam_desc.rard_nob = mlen; - - tx->tx_lntmsg[0] = lntmsg; /* finalize this on RDMA_DONE */ - - kranal_post_fma(conn, tx); - kranal_consume_rxmsg(conn, NULL, 0); - return 0; - - case RANAL_MSG_GET_REQ: - if (lntmsg != NULL) { - /* Matched! */ - kranal_reply(ni, conn, lntmsg); - } else { - /* No match */ - tx = kranal_new_tx_msg(RANAL_MSG_GET_NAK); - if (tx != NULL) { - tx->tx_msg.ram_u.completion.racm_cookie = - rxmsg->ram_u.get.ragm_cookie; - kranal_post_fma(conn, tx); - } - } - kranal_consume_rxmsg(conn, NULL, 0); - return 0; - } -} - -int -kranal_thread_start (int(*fn)(void *arg), void *arg) -{ - long pid = kernel_thread(fn, arg, 0); - - if (pid < 0) - return(int)pid; - - atomic_inc(&kranal_data.kra_nthreads); - return 0; -} - -void -kranal_thread_fini (void) -{ - atomic_dec(&kranal_data.kra_nthreads); -} - -int -kranal_check_conn_timeouts (kra_conn_t *conn) -{ - kra_tx_t *tx; - struct list_head *ttmp; - unsigned long flags; - long timeout; - unsigned long now = jiffies; - - LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED || - conn->rac_state == RANAL_CONN_CLOSING); - - if (!conn->rac_close_sent && - time_after_eq(now, conn->rac_last_tx + conn->rac_keepalive * HZ)) { - /* not sent in a while; schedule conn so scheduler sends a keepalive */ - CDEBUG(D_NET, "Scheduling keepalive %p->%s\n", - conn, libcfs_nid2str(conn->rac_peer->rap_nid)); - kranal_schedule_conn(conn); - } - - timeout = conn->rac_timeout * HZ; - - if (!conn->rac_close_recvd && - time_after_eq(now, conn->rac_last_rx + timeout)) { - CERROR("%s received from %s within %lu seconds\n", - (conn->rac_state == RANAL_CONN_ESTABLISHED) ? - "Nothing" : "CLOSE not", - libcfs_nid2str(conn->rac_peer->rap_nid), - (now - conn->rac_last_rx)/HZ); - return -ETIMEDOUT; - } - - if (conn->rac_state != RANAL_CONN_ESTABLISHED) - return 0; - - /* Check the conn's queues are moving. These are "belt+braces" checks, - * in case of hardware/software errors that make this conn seem - * responsive even though it isn't progressing its message queues. */ - - spin_lock_irqsave(&conn->rac_lock, flags); - - list_for_each (ttmp, &conn->rac_fmaq) { - tx = list_entry(ttmp, kra_tx_t, tx_list); - - if (time_after_eq(now, tx->tx_qtime + timeout)) { - spin_unlock_irqrestore(&conn->rac_lock, flags); - CERROR("tx on fmaq for %s blocked %lu seconds\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - (now - tx->tx_qtime)/HZ); - return -ETIMEDOUT; - } - } - - list_for_each (ttmp, &conn->rac_rdmaq) { - tx = list_entry(ttmp, kra_tx_t, tx_list); - - if (time_after_eq(now, tx->tx_qtime + timeout)) { - spin_unlock_irqrestore(&conn->rac_lock, flags); - CERROR("tx on rdmaq for %s blocked %lu seconds\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - (now - tx->tx_qtime)/HZ); - return -ETIMEDOUT; - } - } - - list_for_each (ttmp, &conn->rac_replyq) { - tx = list_entry(ttmp, kra_tx_t, tx_list); - - if (time_after_eq(now, tx->tx_qtime + timeout)) { - spin_unlock_irqrestore(&conn->rac_lock, flags); - CERROR("tx on replyq for %s blocked %lu seconds\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - (now - tx->tx_qtime)/HZ); - return -ETIMEDOUT; - } - } - - spin_unlock_irqrestore(&conn->rac_lock, flags); - return 0; -} - -void -kranal_reaper_check (int idx, unsigned long *min_timeoutp) -{ - struct list_head *conns = &kranal_data.kra_conns[idx]; - struct list_head *ctmp; - kra_conn_t *conn; - unsigned long flags; - int rc; - - again: - /* NB. We expect to check all the conns and not find any problems, so - * we just use a shared lock while we take a look... */ - read_lock(&kranal_data.kra_global_lock); - - list_for_each (ctmp, conns) { - conn = list_entry(ctmp, kra_conn_t, rac_hashlist); - - if (conn->rac_timeout < *min_timeoutp ) - *min_timeoutp = conn->rac_timeout; - if (conn->rac_keepalive < *min_timeoutp ) - *min_timeoutp = conn->rac_keepalive; - - rc = kranal_check_conn_timeouts(conn); - if (rc == 0) - continue; - - kranal_conn_addref(conn); - read_unlock(&kranal_data.kra_global_lock); - - CERROR("Conn to %s, cqid %d timed out\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - conn->rac_cqid); - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - switch (conn->rac_state) { - default: - LBUG(); - - case RANAL_CONN_ESTABLISHED: - kranal_close_conn_locked(conn, -ETIMEDOUT); - break; - - case RANAL_CONN_CLOSING: - kranal_terminate_conn_locked(conn); - break; - } - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - kranal_conn_decref(conn); - - /* start again now I've dropped the lock */ - goto again; - } - - read_unlock(&kranal_data.kra_global_lock); -} - -int -kranal_connd (void *arg) -{ - long id = (long)arg; - char name[16]; - wait_queue_t wait; - unsigned long flags; - kra_peer_t *peer; - kra_acceptsock_t *ras; - int did_something; - - snprintf(name, sizeof(name), "kranal_connd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - - while (!kranal_data.kra_shutdown) { - did_something = 0; - - if (!list_empty(&kranal_data.kra_connd_acceptq)) { - ras = list_entry(kranal_data.kra_connd_acceptq.next, - kra_acceptsock_t, ras_list); - list_del(&ras->ras_list); - - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - CDEBUG(D_NET,"About to handshake someone\n"); - - kranal_conn_handshake(ras->ras_sock, NULL); - kranal_free_acceptsock(ras); - - CDEBUG(D_NET,"Finished handshaking someone\n"); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - did_something = 1; - } - - if (!list_empty(&kranal_data.kra_connd_peers)) { - peer = list_entry(kranal_data.kra_connd_peers.next, - kra_peer_t, rap_connd_list); - - list_del_init(&peer->rap_connd_list); - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - kranal_connect(peer); - kranal_peer_decref(peer); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - did_something = 1; - } - - if (did_something) - continue; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kranal_data.kra_connd_waitq, &wait); - - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - schedule (); - - set_current_state(TASK_RUNNING); - remove_wait_queue(&kranal_data.kra_connd_waitq, &wait); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - } - - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - kranal_thread_fini(); - return 0; -} - -void -kranal_update_reaper_timeout(long timeout) -{ - unsigned long flags; - - LASSERT (timeout > 0); - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - - if (timeout < kranal_data.kra_new_min_timeout) - kranal_data.kra_new_min_timeout = timeout; - - spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags); -} - -int -kranal_reaper (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - long timeout; - int i; - int conn_entries = kranal_data.kra_conn_hash_size; - int conn_index = 0; - int base_index = conn_entries - 1; - unsigned long next_check_time = jiffies; - long next_min_timeout = MAX_SCHEDULE_TIMEOUT; - long current_min_timeout = 1; - - cfs_daemonize("kranal_reaper"); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - - while (!kranal_data.kra_shutdown) { - /* I wake up every 'p' seconds to check for timeouts on some - * more peers. I try to check every connection 'n' times - * within the global minimum of all keepalive and timeout - * intervals, to ensure I attend to every connection within - * (n+1)/n times its timeout intervals. */ - const int p = 1; - const int n = 3; - unsigned long min_timeout; - int chunk; - - /* careful with the jiffy wrap... */ - timeout = (long)(next_check_time - jiffies); - if (timeout > 0) { - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&kranal_data.kra_reaper_waitq, &wait); - - spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags); - - schedule_timeout(timeout); - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - - set_current_state(TASK_RUNNING); - remove_wait_queue(&kranal_data.kra_reaper_waitq, &wait); - continue; - } - - if (kranal_data.kra_new_min_timeout != MAX_SCHEDULE_TIMEOUT) { - /* new min timeout set: restart min timeout scan */ - next_min_timeout = MAX_SCHEDULE_TIMEOUT; - base_index = conn_index - 1; - if (base_index < 0) - base_index = conn_entries - 1; - - if (kranal_data.kra_new_min_timeout < current_min_timeout) { - current_min_timeout = kranal_data.kra_new_min_timeout; - CDEBUG(D_NET, "Set new min timeout %ld\n", - current_min_timeout); - } - - kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT; - } - min_timeout = current_min_timeout; - - spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags); - - LASSERT (min_timeout > 0); - - /* Compute how many table entries to check now so I get round - * the whole table fast enough given that I do this at fixed - * intervals of 'p' seconds) */ - chunk = conn_entries; - if (min_timeout > n * p) - chunk = (chunk * n * p) / min_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kranal_reaper_check(conn_index, - &next_min_timeout); - conn_index = (conn_index + 1) % conn_entries; - } - - next_check_time += p * HZ; - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - - if (((conn_index - chunk <= base_index && - base_index < conn_index) || - (conn_index - conn_entries - chunk <= base_index && - base_index < conn_index - conn_entries))) { - - /* Scanned all conns: set current_min_timeout... */ - if (current_min_timeout != next_min_timeout) { - current_min_timeout = next_min_timeout; - CDEBUG(D_NET, "Set new min timeout %ld\n", - current_min_timeout); - } - - /* ...and restart min timeout scan */ - next_min_timeout = MAX_SCHEDULE_TIMEOUT; - base_index = conn_index - 1; - if (base_index < 0) - base_index = conn_entries - 1; - } - } - - kranal_thread_fini(); - return 0; -} - -void -kranal_check_rdma_cq (kra_device_t *dev) -{ - kra_conn_t *conn; - kra_tx_t *tx; - RAP_RETURN rrc; - unsigned long flags; - RAP_RDMA_DESCRIPTOR *desc; - __u32 cqid; - __u32 event_type; - - for (;;) { - rrc = RapkCQDone(dev->rad_rdma_cqh, &cqid, &event_type); - if (rrc == RAP_NOT_DONE) { - CDEBUG(D_NET, "RDMA CQ %d empty\n", dev->rad_id); - return; - } - - LASSERT (rrc == RAP_SUCCESS); - LASSERT ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0); - - read_lock(&kranal_data.kra_global_lock); - - conn = kranal_cqid2conn_locked(cqid); - if (conn == NULL) { - /* Conn was destroyed? */ - CDEBUG(D_NET, "RDMA CQID lookup %d failed\n", cqid); - read_unlock(&kranal_data.kra_global_lock); - continue; - } - - rrc = RapkRdmaDone(conn->rac_rihandle, &desc); - LASSERT (rrc == RAP_SUCCESS); - - CDEBUG(D_NET, "Completed %p\n", - list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list)); - - spin_lock_irqsave(&conn->rac_lock, flags); - - LASSERT (!list_empty(&conn->rac_rdmaq)); - tx = list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list); - list_del(&tx->tx_list); - - LASSERT(desc->AppPtr == (void *)tx); - LASSERT(tx->tx_msg.ram_type == RANAL_MSG_PUT_DONE || - tx->tx_msg.ram_type == RANAL_MSG_GET_DONE); - - list_add_tail(&tx->tx_list, &conn->rac_fmaq); - tx->tx_qtime = jiffies; - - spin_unlock_irqrestore(&conn->rac_lock, flags); - - /* Get conn's fmaq processed, now I've just put something - * there */ - kranal_schedule_conn(conn); - - read_unlock(&kranal_data.kra_global_lock); - } -} - -void -kranal_check_fma_cq (kra_device_t *dev) -{ - kra_conn_t *conn; - RAP_RETURN rrc; - __u32 cqid; - __u32 event_type; - struct list_head *conns; - struct list_head *tmp; - int i; - - for (;;) { - rrc = RapkCQDone(dev->rad_fma_cqh, &cqid, &event_type); - if (rrc == RAP_NOT_DONE) { - CDEBUG(D_NET, "FMA CQ %d empty\n", dev->rad_id); - return; - } - - LASSERT (rrc == RAP_SUCCESS); - - if ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0) { - - read_lock(&kranal_data.kra_global_lock); - - conn = kranal_cqid2conn_locked(cqid); - if (conn == NULL) { - CDEBUG(D_NET, "FMA CQID lookup %d failed\n", - cqid); - } else { - CDEBUG(D_NET, "FMA completed: %p CQID %d\n", - conn, cqid); - kranal_schedule_conn(conn); - } - - read_unlock(&kranal_data.kra_global_lock); - continue; - } - - /* FMA CQ has overflowed: check ALL conns */ - CWARN("FMA CQ overflow: scheduling ALL conns on device %d\n", - dev->rad_id); - - for (i = 0; i < kranal_data.kra_conn_hash_size; i++) { - - read_lock(&kranal_data.kra_global_lock); - - conns = &kranal_data.kra_conns[i]; - - list_for_each (tmp, conns) { - conn = list_entry(tmp, kra_conn_t, - rac_hashlist); - - if (conn->rac_device == dev) - kranal_schedule_conn(conn); - } - - /* don't block write lockers for too long... */ - read_unlock(&kranal_data.kra_global_lock); - } - } -} - -int -kranal_sendmsg(kra_conn_t *conn, kra_msg_t *msg, - void *immediate, int immediatenob) -{ - int sync = (msg->ram_type & RANAL_MSG_FENCE) != 0; - RAP_RETURN rrc; - - CDEBUG(D_NET,"%p sending msg %p %02x%s [%p for %d]\n", - conn, msg, msg->ram_type, sync ? "(sync)" : "", - immediate, immediatenob); - - LASSERT (sizeof(*msg) <= RANAL_FMA_MAX_PREFIX); - LASSERT ((msg->ram_type == RANAL_MSG_IMMEDIATE) ? - immediatenob <= RANAL_FMA_MAX_DATA : - immediatenob == 0); - - msg->ram_connstamp = conn->rac_my_connstamp; - msg->ram_seq = conn->rac_tx_seq; - - if (sync) - rrc = RapkFmaSyncSend(conn->rac_rihandle, - immediate, immediatenob, - msg, sizeof(*msg)); - else - rrc = RapkFmaSend(conn->rac_rihandle, - immediate, immediatenob, - msg, sizeof(*msg)); - - switch (rrc) { - default: - LBUG(); - - case RAP_SUCCESS: - conn->rac_last_tx = jiffies; - conn->rac_tx_seq++; - return 0; - - case RAP_NOT_DONE: - if (time_after_eq(jiffies, - conn->rac_last_tx + conn->rac_keepalive*HZ)) - CWARN("EAGAIN sending %02x (idle %lu secs)\n", - msg->ram_type, (jiffies - conn->rac_last_tx)/HZ); - return -EAGAIN; - } -} - -void -kranal_process_fmaq (kra_conn_t *conn) -{ - unsigned long flags; - int more_to_do; - kra_tx_t *tx; - int rc; - int expect_reply; - - /* NB 1. kranal_sendmsg() may fail if I'm out of credits right now. - * However I will be rescheduled by an FMA completion event - * when I eventually get some. - * NB 2. Sampling rac_state here races with setting it elsewhere. - * But it doesn't matter if I try to send a "real" message just - * as I start closing because I'll get scheduled to send the - * close anyway. */ - - /* Not racing with incoming message processing! */ - LASSERT (current == conn->rac_device->rad_scheduler); - - if (conn->rac_state != RANAL_CONN_ESTABLISHED) { - if (!list_empty(&conn->rac_rdmaq)) { - /* RDMAs in progress */ - LASSERT (!conn->rac_close_sent); - - if (time_after_eq(jiffies, - conn->rac_last_tx + - conn->rac_keepalive * HZ)) { - CDEBUG(D_NET, "sending NOOP (rdma in progress)\n"); - kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP); - kranal_sendmsg(conn, &conn->rac_msg, NULL, 0); - } - return; - } - - if (conn->rac_close_sent) - return; - - CWARN("sending CLOSE to %s\n", - libcfs_nid2str(conn->rac_peer->rap_nid)); - kranal_init_msg(&conn->rac_msg, RANAL_MSG_CLOSE); - rc = kranal_sendmsg(conn, &conn->rac_msg, NULL, 0); - if (rc != 0) - return; - - conn->rac_close_sent = 1; - if (!conn->rac_close_recvd) - return; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (conn->rac_state == RANAL_CONN_CLOSING) - kranal_terminate_conn_locked(conn); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - return; - } - - spin_lock_irqsave(&conn->rac_lock, flags); - - if (list_empty(&conn->rac_fmaq)) { - - spin_unlock_irqrestore(&conn->rac_lock, flags); - - if (time_after_eq(jiffies, - conn->rac_last_tx + conn->rac_keepalive * HZ)) { - CDEBUG(D_NET, "sending NOOP -> %s (%p idle %lu(%ld))\n", - libcfs_nid2str(conn->rac_peer->rap_nid), conn, - (jiffies - conn->rac_last_tx)/HZ, conn->rac_keepalive); - kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP); - kranal_sendmsg(conn, &conn->rac_msg, NULL, 0); - } - return; - } - - tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list); - list_del(&tx->tx_list); - more_to_do = !list_empty(&conn->rac_fmaq); - - spin_unlock_irqrestore(&conn->rac_lock, flags); - - expect_reply = 0; - CDEBUG(D_NET, "sending regular msg: %p, type %02x, cookie "LPX64"\n", - tx, tx->tx_msg.ram_type, tx->tx_cookie); - switch (tx->tx_msg.ram_type) { - default: - LBUG(); - - case RANAL_MSG_IMMEDIATE: - rc = kranal_sendmsg(conn, &tx->tx_msg, - tx->tx_buffer, tx->tx_nob); - break; - - case RANAL_MSG_PUT_NAK: - case RANAL_MSG_PUT_DONE: - case RANAL_MSG_GET_NAK: - case RANAL_MSG_GET_DONE: - rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0); - break; - - case RANAL_MSG_PUT_REQ: - rc = kranal_map_buffer(tx); - LASSERT (rc != -EAGAIN); - if (rc != 0) - break; - - tx->tx_msg.ram_u.putreq.raprm_cookie = tx->tx_cookie; - rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0); - expect_reply = 1; - break; - - case RANAL_MSG_PUT_ACK: - rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0); - expect_reply = 1; - break; - - case RANAL_MSG_GET_REQ: - rc = kranal_map_buffer(tx); - LASSERT (rc != -EAGAIN); - if (rc != 0) - break; - - tx->tx_msg.ram_u.get.ragm_cookie = tx->tx_cookie; - tx->tx_msg.ram_u.get.ragm_desc.rard_key = tx->tx_map_key; - tx->tx_msg.ram_u.get.ragm_desc.rard_addr.AddressBits = - (__u64)((unsigned long)tx->tx_buffer); - tx->tx_msg.ram_u.get.ragm_desc.rard_nob = tx->tx_nob; - rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0); - expect_reply = 1; - break; - } - - if (rc == -EAGAIN) { - /* I need credits to send this. Replace tx at the head of the - * fmaq and I'll get rescheduled when credits appear */ - CDEBUG(D_NET, "EAGAIN on %p\n", conn); - spin_lock_irqsave(&conn->rac_lock, flags); - list_add(&tx->tx_list, &conn->rac_fmaq); - spin_unlock_irqrestore(&conn->rac_lock, flags); - return; - } - - if (!expect_reply || rc != 0) { - kranal_tx_done(tx, rc); - } else { - /* LASSERT(current) above ensures this doesn't race with reply - * processing */ - spin_lock_irqsave(&conn->rac_lock, flags); - list_add_tail(&tx->tx_list, &conn->rac_replyq); - tx->tx_qtime = jiffies; - spin_unlock_irqrestore(&conn->rac_lock, flags); - } - - if (more_to_do) { - CDEBUG(D_NET, "Rescheduling %p (more to do)\n", conn); - kranal_schedule_conn(conn); - } -} - -static inline void -kranal_swab_rdma_desc (kra_rdma_desc_t *d) -{ - __swab64s(&d->rard_key.Key); - __swab16s(&d->rard_key.Cookie); - __swab16s(&d->rard_key.MdHandle); - __swab32s(&d->rard_key.Flags); - __swab64s(&d->rard_addr.AddressBits); - __swab32s(&d->rard_nob); -} - -kra_tx_t * -kranal_match_reply(kra_conn_t *conn, int type, __u64 cookie) -{ - struct list_head *ttmp; - kra_tx_t *tx; - unsigned long flags; - - spin_lock_irqsave(&conn->rac_lock, flags); - - list_for_each(ttmp, &conn->rac_replyq) { - tx = list_entry(ttmp, kra_tx_t, tx_list); - - CDEBUG(D_NET,"Checking %p %02x/"LPX64"\n", - tx, tx->tx_msg.ram_type, tx->tx_cookie); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_msg.ram_type != type) { - spin_unlock_irqrestore(&conn->rac_lock, flags); - CWARN("Unexpected type %x (%x expected) " - "matched reply from %s\n", - tx->tx_msg.ram_type, type, - libcfs_nid2str(conn->rac_peer->rap_nid)); - return NULL; - } - - list_del(&tx->tx_list); - spin_unlock_irqrestore(&conn->rac_lock, flags); - return tx; - } - - spin_unlock_irqrestore(&conn->rac_lock, flags); - CWARN("Unmatched reply %02x/"LPX64" from %s\n", - type, cookie, libcfs_nid2str(conn->rac_peer->rap_nid)); - return NULL; -} - -void -kranal_check_fma_rx (kra_conn_t *conn) -{ - unsigned long flags; - __u32 seq; - kra_tx_t *tx; - kra_msg_t *msg; - void *prefix; - RAP_RETURN rrc = RapkFmaGetPrefix(conn->rac_rihandle, &prefix); - kra_peer_t *peer = conn->rac_peer; - int rc = 0; - int repost = 1; - - if (rrc == RAP_NOT_DONE) - return; - - CDEBUG(D_NET, "RX on %p\n", conn); - - LASSERT (rrc == RAP_SUCCESS); - conn->rac_last_rx = jiffies; - seq = conn->rac_rx_seq++; - msg = (kra_msg_t *)prefix; - - /* stash message for portals callbacks they'll NULL - * rac_rxmsg if they consume it */ - LASSERT (conn->rac_rxmsg == NULL); - conn->rac_rxmsg = msg; - - if (msg->ram_magic != RANAL_MSG_MAGIC) { - if (__swab32(msg->ram_magic) != RANAL_MSG_MAGIC) { - CERROR("Unexpected magic %08x from %s\n", - msg->ram_magic, libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - __swab32s(&msg->ram_magic); - __swab16s(&msg->ram_version); - __swab16s(&msg->ram_type); - __swab64s(&msg->ram_srcnid); - __swab64s(&msg->ram_connstamp); - __swab32s(&msg->ram_seq); - - /* NB message type checked below; NOT here... */ - switch (msg->ram_type) { - case RANAL_MSG_PUT_ACK: - kranal_swab_rdma_desc(&msg->ram_u.putack.rapam_desc); - break; - - case RANAL_MSG_GET_REQ: - kranal_swab_rdma_desc(&msg->ram_u.get.ragm_desc); - break; - - default: - break; - } - } - - if (msg->ram_version != RANAL_MSG_VERSION) { - CERROR("Unexpected protocol version %d from %s\n", - msg->ram_version, libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if (msg->ram_srcnid != peer->rap_nid) { - CERROR("Unexpected peer %s from %s\n", - libcfs_nid2str(msg->ram_srcnid), - libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if (msg->ram_connstamp != conn->rac_peer_connstamp) { - CERROR("Unexpected connstamp "LPX64"("LPX64 - " expected) from %s\n", - msg->ram_connstamp, conn->rac_peer_connstamp, - libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if (msg->ram_seq != seq) { - CERROR("Unexpected sequence number %d(%d expected) from %s\n", - msg->ram_seq, seq, libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if ((msg->ram_type & RANAL_MSG_FENCE) != 0) { - /* This message signals RDMA completion... */ - rrc = RapkFmaSyncWait(conn->rac_rihandle); - if (rrc != RAP_SUCCESS) { - CERROR("RapkFmaSyncWait failed: %d\n", rrc); - rc = -ENETDOWN; - goto out; - } - } - - if (conn->rac_close_recvd) { - CERROR("Unexpected message %d after CLOSE from %s\n", - msg->ram_type, libcfs_nid2str(conn->rac_peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if (msg->ram_type == RANAL_MSG_CLOSE) { - CWARN("RX CLOSE from %s\n", libcfs_nid2str(conn->rac_peer->rap_nid)); - conn->rac_close_recvd = 1; - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (conn->rac_state == RANAL_CONN_ESTABLISHED) - kranal_close_conn_locked(conn, 0); - else if (conn->rac_state == RANAL_CONN_CLOSING && - conn->rac_close_sent) - kranal_terminate_conn_locked(conn); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - goto out; - } - - if (conn->rac_state != RANAL_CONN_ESTABLISHED) - goto out; - - switch (msg->ram_type) { - case RANAL_MSG_NOOP: - /* Nothing to do; just a keepalive */ - CDEBUG(D_NET, "RX NOOP on %p\n", conn); - break; - - case RANAL_MSG_IMMEDIATE: - CDEBUG(D_NET, "RX IMMEDIATE on %p\n", conn); - rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.immediate.raim_hdr, - msg->ram_srcnid, conn, 0); - repost = rc < 0; - break; - - case RANAL_MSG_PUT_REQ: - CDEBUG(D_NET, "RX PUT_REQ on %p\n", conn); - rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.putreq.raprm_hdr, - msg->ram_srcnid, conn, 1); - repost = rc < 0; - break; - - case RANAL_MSG_PUT_NAK: - CDEBUG(D_NET, "RX PUT_NAK on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ, - msg->ram_u.completion.racm_cookie); - if (tx == NULL) - break; - - LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED || - tx->tx_buftype == RANAL_BUF_VIRT_MAPPED); - kranal_tx_done(tx, -ENOENT); /* no match */ - break; - - case RANAL_MSG_PUT_ACK: - CDEBUG(D_NET, "RX PUT_ACK on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ, - msg->ram_u.putack.rapam_src_cookie); - if (tx == NULL) - break; - - kranal_rdma(tx, RANAL_MSG_PUT_DONE, - &msg->ram_u.putack.rapam_desc, - msg->ram_u.putack.rapam_desc.rard_nob, - msg->ram_u.putack.rapam_dst_cookie); - break; - - case RANAL_MSG_PUT_DONE: - CDEBUG(D_NET, "RX PUT_DONE on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_PUT_ACK, - msg->ram_u.completion.racm_cookie); - if (tx == NULL) - break; - - LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED || - tx->tx_buftype == RANAL_BUF_VIRT_MAPPED); - kranal_tx_done(tx, 0); - break; - - case RANAL_MSG_GET_REQ: - CDEBUG(D_NET, "RX GET_REQ on %p\n", conn); - rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.get.ragm_hdr, - msg->ram_srcnid, conn, 1); - repost = rc < 0; - break; - - case RANAL_MSG_GET_NAK: - CDEBUG(D_NET, "RX GET_NAK on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ, - msg->ram_u.completion.racm_cookie); - if (tx == NULL) - break; - - LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED || - tx->tx_buftype == RANAL_BUF_VIRT_MAPPED); - kranal_tx_done(tx, -ENOENT); /* no match */ - break; - - case RANAL_MSG_GET_DONE: - CDEBUG(D_NET, "RX GET_DONE on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ, - msg->ram_u.completion.racm_cookie); - if (tx == NULL) - break; - - LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED || - tx->tx_buftype == RANAL_BUF_VIRT_MAPPED); -#if 0 - /* completion message should send rdma length if we ever allow - * GET truncation */ - lnet_set_reply_msg_len(kranal_data.kra_ni, tx->tx_lntmsg[1], ???); -#endif - kranal_tx_done(tx, 0); - break; - } - - out: - if (rc < 0) /* protocol/comms error */ - kranal_close_conn (conn, rc); - - if (repost && conn->rac_rxmsg != NULL) - kranal_consume_rxmsg(conn, NULL, 0); - - /* check again later */ - kranal_schedule_conn(conn); -} - -void -kranal_complete_closed_conn (kra_conn_t *conn) -{ - kra_tx_t *tx; - int nfma; - int nreplies; - - LASSERT (conn->rac_state == RANAL_CONN_CLOSED); - LASSERT (list_empty(&conn->rac_list)); - LASSERT (list_empty(&conn->rac_hashlist)); - - for (nfma = 0; !list_empty(&conn->rac_fmaq); nfma++) { - tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list); - - list_del(&tx->tx_list); - kranal_tx_done(tx, -ECONNABORTED); - } - - LASSERT (list_empty(&conn->rac_rdmaq)); - - for (nreplies = 0; !list_empty(&conn->rac_replyq); nreplies++) { - tx = list_entry(conn->rac_replyq.next, kra_tx_t, tx_list); - - list_del(&tx->tx_list); - kranal_tx_done(tx, -ECONNABORTED); - } - - CWARN("Closed conn %p -> %s: nmsg %d nreplies %d\n", - conn, libcfs_nid2str(conn->rac_peer->rap_nid), nfma, nreplies); -} - -int -kranal_process_new_conn (kra_conn_t *conn) -{ - RAP_RETURN rrc; - - rrc = RapkCompleteSync(conn->rac_rihandle, 1); - if (rrc == RAP_SUCCESS) - return 0; - - LASSERT (rrc == RAP_NOT_DONE); - if (!time_after_eq(jiffies, conn->rac_last_tx + - conn->rac_timeout * HZ)) - return -EAGAIN; - - /* Too late */ - rrc = RapkCompleteSync(conn->rac_rihandle, 0); - LASSERT (rrc == RAP_SUCCESS); - return -ETIMEDOUT; -} - -int -kranal_scheduler (void *arg) -{ - kra_device_t *dev = (kra_device_t *)arg; - wait_queue_t wait; - char name[16]; - kra_conn_t *conn; - unsigned long flags; - unsigned long deadline; - unsigned long soonest; - int nsoonest; - long timeout; - struct list_head *tmp; - struct list_head *nxt; - int rc; - int dropped_lock; - int busy_loops = 0; - - snprintf(name, sizeof(name), "kranal_sd_%02d", dev->rad_idx); - cfs_daemonize(name); - cfs_block_allsigs(); - - dev->rad_scheduler = current; - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&dev->rad_lock, flags); - - while (!kranal_data.kra_shutdown) { - /* Safe: kra_shutdown only set when quiescent */ - - if (busy_loops++ >= RANAL_RESCHED) { - spin_unlock_irqrestore(&dev->rad_lock, flags); - - our_cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&dev->rad_lock, flags); - } - - dropped_lock = 0; - - if (dev->rad_ready) { - /* Device callback fired since I last checked it */ - dev->rad_ready = 0; - spin_unlock_irqrestore(&dev->rad_lock, flags); - dropped_lock = 1; - - kranal_check_rdma_cq(dev); - kranal_check_fma_cq(dev); - - spin_lock_irqsave(&dev->rad_lock, flags); - } - - list_for_each_safe(tmp, nxt, &dev->rad_ready_conns) { - conn = list_entry(tmp, kra_conn_t, rac_schedlist); - - list_del_init(&conn->rac_schedlist); - LASSERT (conn->rac_scheduled); - conn->rac_scheduled = 0; - spin_unlock_irqrestore(&dev->rad_lock, flags); - dropped_lock = 1; - - kranal_check_fma_rx(conn); - kranal_process_fmaq(conn); - - if (conn->rac_state == RANAL_CONN_CLOSED) - kranal_complete_closed_conn(conn); - - kranal_conn_decref(conn); - spin_lock_irqsave(&dev->rad_lock, flags); - } - - nsoonest = 0; - soonest = jiffies; - - list_for_each_safe(tmp, nxt, &dev->rad_new_conns) { - conn = list_entry(tmp, kra_conn_t, rac_schedlist); - - deadline = conn->rac_last_tx + conn->rac_keepalive; - if (time_after_eq(jiffies, deadline)) { - /* Time to process this new conn */ - spin_unlock_irqrestore(&dev->rad_lock, flags); - dropped_lock = 1; - - rc = kranal_process_new_conn(conn); - if (rc != -EAGAIN) { - /* All done with this conn */ - spin_lock_irqsave(&dev->rad_lock, flags); - list_del_init(&conn->rac_schedlist); - spin_unlock_irqrestore(&dev->rad_lock, flags); - - kranal_conn_decref(conn); - spin_lock_irqsave(&dev->rad_lock, flags); - continue; - } - - /* retry with exponential backoff until HZ */ - if (conn->rac_keepalive == 0) - conn->rac_keepalive = 1; - else if (conn->rac_keepalive <= HZ) - conn->rac_keepalive *= 2; - else - conn->rac_keepalive += HZ; - - deadline = conn->rac_last_tx + conn->rac_keepalive; - spin_lock_irqsave(&dev->rad_lock, flags); - } - - /* Does this conn need attention soonest? */ - if (nsoonest++ == 0 || - !time_after_eq(deadline, soonest)) - soonest = deadline; - } - - if (dropped_lock) /* may sleep iff I didn't drop the lock */ - continue; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&dev->rad_waitq, &wait); - spin_unlock_irqrestore(&dev->rad_lock, flags); - - if (nsoonest == 0) { - busy_loops = 0; - schedule(); - } else { - timeout = (long)(soonest - jiffies); - if (timeout > 0) { - busy_loops = 0; - schedule_timeout(timeout); - } - } - - remove_wait_queue(&dev->rad_waitq, &wait); - set_current_state(TASK_RUNNING); - spin_lock_irqsave(&dev->rad_lock, flags); - } - - spin_unlock_irqrestore(&dev->rad_lock, flags); - - dev->rad_scheduler = NULL; - kranal_thread_fini(); - return 0; -} diff --git a/lnet/klnds/ralnd/ralnd_modparams.c b/lnet/klnds/ralnd/ralnd_modparams.c deleted file mode 100644 index e6ee1bddc5b3b919b5bbf88fff8700d2ef6ed8eb..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/ralnd_modparams.c +++ /dev/null @@ -1,196 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "ralnd.h" - -static int n_connd = 4; -CFS_MODULE_PARM(n_connd, "i", int, 0444, - "# of connection daemons"); - -static int min_reconnect_interval = 1; -CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644, - "minimum connection retry interval (seconds)"); - -static int max_reconnect_interval = 60; -CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644, - "maximum connection retry interval (seconds)"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of transmit descriptors"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 32; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int fma_cq_size = 8192; -CFS_MODULE_PARM(fma_cq_size, "i", int, 0444, - "size of the completion queue"); - -static int timeout = 30; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "communications timeout (seconds)"); - -static int max_immediate = (2<<10); -CFS_MODULE_PARM(max_immediate, "i", int, 0644, - "immediate/RDMA breakpoint"); - -kra_tunables_t kranal_tunables = { - .kra_n_connd = &n_connd, - .kra_min_reconnect_interval = &min_reconnect_interval, - .kra_max_reconnect_interval = &max_reconnect_interval, - .kra_ntx = &ntx, - .kra_credits = &credits, - .kra_peercredits = &peer_credits, - .kra_fma_cq_size = &fma_cq_size, - .kra_timeout = &timeout, - .kra_max_immediate = &max_immediate, -}; - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM -static cfs_sysctl_table_t kranal_ctl_table[] = { - { - .ctl_name = 1, - .procname = "n_connd", - .data = &n_connd, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 2, - .procname = "min_reconnect_interval", - .data = &min_reconnect_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 3, - .procname = "max_reconnect_interval", - .data = &max_reconnect_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 4, - .procname = "ntx", - .data = &ntx, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 5, - .procname = "credits", - .data = &credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 6, - .procname = "peer_credits", - .data = &peer_credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 7, - .procname = "fma_cq_size", - .data = &fma_cq_size, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 8, - .procname = "timeout", - .data = &timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 9, - .procname = "max_immediate", - .data = &max_immediate, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - {0} -}; - -static cfs_sysctl_table_t kranal_top_ctl_table[] = { - { - .ctl_name = 202, - .procname = "ranal", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = kranal_ctl_table - }, - {0} -}; - -int -kranal_tunables_init () -{ - kranal_tunables.kra_sysctl = - cfs_register_sysctl_table(kranal_top_ctl_table, 0); - - if (kranal_tunables.kra_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -kranal_tunables_fini () -{ - if (kranal_tunables.kra_sysctl != NULL) - cfs_unregister_sysctl_table(kranal_tunables.kra_sysctl); -} - -#else - -int -kranal_tunables_init () -{ - return 0; -} - -void -kranal_tunables_fini () -{ -} - -#endif - diff --git a/lnet/klnds/socklnd/.cvsignore b/lnet/klnds/socklnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/socklnd/Info.plist b/lnet/klnds/socklnd/Info.plist deleted file mode 100644 index f5a5460ad280753fcd0c15f66e70428143676416..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/Info.plist +++ /dev/null @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> -<plist version="1.0"> -<dict> - <key>CFBundleDevelopmentRegion</key> - <string>English</string> - <key>CFBundleExecutable</key> - <string>ksocklnd</string> - <key>CFBundleIconFile</key> - <string></string> - <key>CFBundleIdentifier</key> - <string>com.clusterfs.lustre.ksocklnd</string> - <key>CFBundleInfoDictionaryVersion</key> - <string>6.0</string> - <key>CFBundlePackageType</key> - <string>KEXT</string> - <key>CFBundleSignature</key> - <string>????</string> - <key>CFBundleVersion</key> - <string>1.0.1</string> - <key>OSBundleCompatibleVersion</key> - <string>1.0.0</string> - <key>OSBundleLibraries</key> - <dict> - <key>com.apple.kpi.bsd</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.libkern</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.mach</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.unsupported</key> - <string>8.0.0b1</string> - <key>com.clusterfs.lustre.libcfs</key> - <string>1.0.0</string> - <key>com.clusterfs.lustre.lnet</key> - <string>1.0.0</string> - </dict> -</dict> -</plist> diff --git a/lnet/klnds/socklnd/Makefile.in b/lnet/klnds/socklnd/Makefile.in deleted file mode 100644 index 3a6c3f78043a9108a9fc0cc1c0df5ed763149d46..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/Makefile.in +++ /dev/null @@ -1,5 +0,0 @@ -MODULES := ksocklnd - -ksocklnd-objs := socklnd.o socklnd_cb.o socklnd_modparams.o socklnd_lib-linux.o - -@INCLUDE_RULES@ diff --git a/lnet/klnds/socklnd/autoMakefile.am b/lnet/klnds/socklnd/autoMakefile.am deleted file mode 100644 index 0dbe69756ca6f4383fe31b80714bb909d4e303ff..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/autoMakefile.am +++ /dev/null @@ -1,30 +0,0 @@ -if MODULES -if LINUX - - modulenet_DATA := ksocklnd$(KMODEXT) - -endif # LINUX -endif # MODULES - -DIST_SOURCES := $(ksocklnd-objs:%.o=%.c) socklnd_lib-linux.h socklnd.h - -if DARWIN - - macos_PROGRAMS := ksocklnd - - nodist_ksocklnd_SOURCES := socklnd.c socklnd_cb.c socklnd_modparams.c socklnd_lib-darwin.c - DIST_SOURCES += socklnd_lib-darwin.c socklnd_lib-darwin.h - - ksocklnd_CFLAGS := $(EXTRA_KCFLAGS) - ksocklnd_LDFLAGS := $(EXTRA_KLDFLAGS) - ksocklnd_LDADD := $(EXTRA_KLIBS) - - plist_DATA := Info.plist - install_data_hook := fix-kext-ownership - -endif # DARWIN - -EXTRA_DIST := $(plist_DATA) -install-data-hook: $(install_data_hook) - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ socklnd_lib.c diff --git a/lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj b/lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj deleted file mode 100644 index cab8b433f3a1b3509794dd72a68307b8ebed8b0d..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj +++ /dev/null @@ -1,287 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 32A4FEB80562C75700D090E7, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 089C167CFE841241C02AAC07, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = ksocknal; - refType = 4; - sourceTree = "<group>"; - }; - 089C167CFE841241C02AAC07 = { - children = ( - 32A4FEC30562C75700D090E7, - ); - isa = PBXGroup; - name = Resources; - refType = 4; - sourceTree = "<group>"; - }; -//080 -//081 -//082 -//083 -//084 -//190 -//191 -//192 -//193 -//194 - 1957C5680737C71F00425049 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = socknal.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 1957C5690737C71F00425049 = { - fileRef = 1957C5680737C71F00425049; - isa = PBXBuildFile; - settings = { - }; - }; - 1957C56A0737C72F00425049 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = socknal_cb.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 1957C56B0737C72F00425049 = { - fileRef = 1957C56A0737C72F00425049; - isa = PBXBuildFile; - settings = { - }; - }; - 1957C5B20737C78E00425049 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = socknal_lib.c; - path = arch/xnu/socknal_lib.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 1957C5B30737C78E00425049 = { - fileRef = 1957C5B20737C78E00425049; - isa = PBXBuildFile; - settings = { - }; - }; - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 32A4FEC40562C75800D090E7, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = "<group>"; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 1957C5B20737C78E00425049, - 1957C56A0737C72F00425049, - 1957C5680737C71F00425049, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = "<group>"; - }; -//240 -//241 -//242 -//243 -//244 -//320 -//321 -//322 -//323 -//324 - 32A4FEB80562C75700D090E7 = { - buildPhases = ( - 32A4FEB90562C75700D090E7, - 32A4FEBA0562C75700D090E7, - 32A4FEBB0562C75700D090E7, - 32A4FEBD0562C75700D090E7, - 32A4FEBF0562C75700D090E7, - 32A4FEC00562C75700D090E7, - 32A4FEC10562C75700D090E7, - ); - buildRules = ( - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; - GCC_WARN_UNKNOWN_PRAGMAS = NO; - HEADER_SEARCH_PATHS = "../../include ./arch/xnu"; - INFOPLIST_FILE = Info.plist; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.clusterfs.lustre.portals.knals.ksocknal; - MODULE_START = ksocknal_start; - MODULE_STOP = ksocknal_stop; - MODULE_VERSION = 1.0.1; - OTHER_CFLAGS = "-D__KERNEL__"; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = ksocknal; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXNativeTarget; - name = ksocknal; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = ksocknal; - productReference = 32A4FEC40562C75800D090E7; - productType = "com.apple.product-type.kernel-extension"; - }; - 32A4FEB90562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEBA0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBB0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBD0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - 1957C5690737C71F00425049, - 1957C56B0737C72F00425049, - 1957C5B30737C78E00425049, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBF0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC00562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC10562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEC30562C75700D090E7 = { - isa = PBXFileReference; - lastKnownFileType = text.plist.xml; - path = Info.plist; - refType = 4; - sourceTree = "<group>"; - }; - 32A4FEC40562C75800D090E7 = { - explicitFileType = wrapper.cfbundle; - includeInIndex = 0; - isa = PBXFileReference; - path = ksocknal.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c deleted file mode 100644 index 710ebd2e94fc82333879432d03f041c13ecd45a3..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd.c +++ /dev/null @@ -1,2587 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socklnd.h" - -lnd_t the_ksocklnd = { - .lnd_type = SOCKLND, - .lnd_startup = ksocknal_startup, - .lnd_shutdown = ksocknal_shutdown, - .lnd_ctl = ksocknal_ctl, - .lnd_send = ksocknal_send, - .lnd_recv = ksocknal_recv, - .lnd_notify = ksocknal_notify, - .lnd_accept = ksocknal_accept, -}; - -ksock_nal_data_t ksocknal_data; - -ksock_interface_t * -ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip) -{ - ksock_net_t *net = ni->ni_data; - int i; - ksock_interface_t *iface; - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT(i < LNET_MAX_INTERFACES); - iface = &net->ksnn_interfaces[i]; - - if (iface->ksni_ipaddr == ip) - return (iface); - } - - return (NULL); -} - -ksock_route_t * -ksocknal_create_route (__u32 ipaddr, int port) -{ - ksock_route_t *route; - - LIBCFS_ALLOC (route, sizeof (*route)); - if (route == NULL) - return (NULL); - - atomic_set (&route->ksnr_refcount, 1); - route->ksnr_peer = NULL; - route->ksnr_retry_interval = 0; /* OK to connect at any time */ - route->ksnr_ipaddr = ipaddr; - route->ksnr_port = port; - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - route->ksnr_connected = 0; - route->ksnr_deleted = 0; - route->ksnr_conn_count = 0; - route->ksnr_share_count = 0; - - return (route); -} - -void -ksocknal_destroy_route (ksock_route_t *route) -{ - LASSERT (atomic_read(&route->ksnr_refcount) == 0); - - if (route->ksnr_peer != NULL) - ksocknal_peer_decref(route->ksnr_peer); - - LIBCFS_FREE (route, sizeof (*route)); -} - -int -ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) -{ - ksock_net_t *net = ni->ni_data; - ksock_peer_t *peer; - - LASSERT (id.nid != LNET_NID_ANY); - LASSERT (id.pid != LNET_PID_ANY); - LASSERT (!in_interrupt()); - - LIBCFS_ALLOC (peer, sizeof (*peer)); - if (peer == NULL) - return -ENOMEM; - - memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */ - - peer->ksnp_ni = ni; - peer->ksnp_id = id; - atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */ - peer->ksnp_closing = 0; - peer->ksnp_accepting = 0; - peer->ksnp_zc_next_cookie = 1; - peer->ksnp_proto = NULL; - CFS_INIT_LIST_HEAD (&peer->ksnp_conns); - CFS_INIT_LIST_HEAD (&peer->ksnp_routes); - CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue); - CFS_INIT_LIST_HEAD (&peer->ksnp_zc_req_list); - spin_lock_init(&peer->ksnp_lock); - - spin_lock_bh (&net->ksnn_lock); - - if (net->ksnn_shutdown) { - spin_unlock_bh (&net->ksnn_lock); - - LIBCFS_FREE(peer, sizeof(*peer)); - CERROR("Can't create peer: network shutdown\n"); - return -ESHUTDOWN; - } - - net->ksnn_npeers++; - - spin_unlock_bh (&net->ksnn_lock); - - *peerp = peer; - return 0; -} - -void -ksocknal_destroy_peer (ksock_peer_t *peer) -{ - ksock_net_t *net = peer->ksnp_ni->ni_data; - - CDEBUG (D_NET, "peer %s %p deleted\n", - libcfs_id2str(peer->ksnp_id), peer); - - LASSERT (atomic_read (&peer->ksnp_refcount) == 0); - LASSERT (peer->ksnp_accepting == 0); - LASSERT (list_empty (&peer->ksnp_conns)); - LASSERT (list_empty (&peer->ksnp_routes)); - LASSERT (list_empty (&peer->ksnp_tx_queue)); - LASSERT (list_empty (&peer->ksnp_zc_req_list)); - - LIBCFS_FREE (peer, sizeof (*peer)); - - /* NB a peer's connections and routes keep a reference on their peer - * until they are destroyed, so we can be assured that _all_ state to - * do with this peer has been cleaned up when its refcount drops to - * zero. */ - spin_lock_bh (&net->ksnn_lock); - net->ksnn_npeers--; - spin_unlock_bh (&net->ksnn_lock); -} - -ksock_peer_t * -ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id) -{ - struct list_head *peer_list = ksocknal_nid2peerlist(id.nid); - struct list_head *tmp; - ksock_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry (tmp, ksock_peer_t, ksnp_list); - - LASSERT (!peer->ksnp_closing); - - if (peer->ksnp_ni != ni) - continue; - - if (peer->ksnp_id.nid != id.nid || - peer->ksnp_id.pid != id.pid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_id2str(id), - atomic_read(&peer->ksnp_refcount)); - return (peer); - } - return (NULL); -} - -ksock_peer_t * -ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id) -{ - ksock_peer_t *peer; - - read_lock (&ksocknal_data.ksnd_global_lock); - peer = ksocknal_find_peer_locked (ni, id); - if (peer != NULL) /* +1 ref for caller? */ - ksocknal_peer_addref(peer); - read_unlock (&ksocknal_data.ksnd_global_lock); - - return (peer); -} - -void -ksocknal_unlink_peer_locked (ksock_peer_t *peer) -{ - int i; - __u32 ip; - ksock_interface_t *iface; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) { - LASSERT (i < LNET_MAX_INTERFACES); - ip = peer->ksnp_passive_ips[i]; - - iface = ksocknal_ip2iface(peer->ksnp_ni, ip); - /* All IPs in peer->ksnp_passive_ips[] come from the - * interface list, therefore the call must succeed. */ - LASSERT (iface != NULL); - - CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n", - peer, iface, iface->ksni_nroutes); - iface->ksni_npeers--; - } - - LASSERT (list_empty(&peer->ksnp_conns)); - LASSERT (list_empty(&peer->ksnp_routes)); - LASSERT (!peer->ksnp_closing); - peer->ksnp_closing = 1; - list_del (&peer->ksnp_list); - /* lose peerlist's ref */ - ksocknal_peer_decref(peer); -} - -int -ksocknal_get_peer_info (lnet_ni_t *ni, int index, - lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port, - int *conn_count, int *share_count) -{ - ksock_peer_t *peer; - struct list_head *ptmp; - ksock_route_t *route; - struct list_head *rtmp; - int i; - int j; - int rc = -ENOENT; - - read_lock (&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - - list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - if (peer->ksnp_n_passive_ips == 0 && - list_empty(&peer->ksnp_routes)) { - if (index-- > 0) - continue; - - *id = peer->ksnp_id; - *myip = 0; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } - - for (j = 0; j < peer->ksnp_n_passive_ips; j++) { - if (index-- > 0) - continue; - - *id = peer->ksnp_id; - *myip = peer->ksnp_passive_ips[j]; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } - - list_for_each (rtmp, &peer->ksnp_routes) { - if (index-- > 0) - continue; - - route = list_entry(rtmp, ksock_route_t, - ksnr_list); - - *id = peer->ksnp_id; - *myip = route->ksnr_myipaddr; - *peer_ip = route->ksnr_ipaddr; - *port = route->ksnr_port; - *conn_count = route->ksnr_conn_count; - *share_count = route->ksnr_share_count; - rc = 0; - goto out; - } - } - } - out: - read_unlock (&ksocknal_data.ksnd_global_lock); - return (rc); -} - -void -ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) -{ - ksock_peer_t *peer = route->ksnr_peer; - int type = conn->ksnc_type; - ksock_interface_t *iface; - - conn->ksnc_route = route; - ksocknal_route_addref(route); - - if (route->ksnr_myipaddr != conn->ksnc_myipaddr) { - if (route->ksnr_myipaddr == 0) { - /* route wasn't bound locally yet (the initial route) */ - CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(route->ksnr_ipaddr), - HIPQUAD(conn->ksnc_myipaddr)); - } else { - CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from " - "%u.%u.%u.%u to %u.%u.%u.%u\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(route->ksnr_ipaddr), - HIPQUAD(route->ksnr_myipaddr), - HIPQUAD(conn->ksnc_myipaddr)); - - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes--; - } - route->ksnr_myipaddr = conn->ksnc_myipaddr; - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes++; - } - - route->ksnr_connected |= (1<<type); - route->ksnr_conn_count++; - - /* Successful connection => further attempts can - * proceed immediately */ - route->ksnr_retry_interval = 0; -} - -void -ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) -{ - struct list_head *tmp; - ksock_conn_t *conn; - ksock_route_t *route2; - - LASSERT (!peer->ksnp_closing); - LASSERT (route->ksnr_peer == NULL); - LASSERT (!route->ksnr_scheduled); - LASSERT (!route->ksnr_connecting); - LASSERT (route->ksnr_connected == 0); - - /* LASSERT(unique) */ - list_for_each(tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); - - if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { - CERROR ("Duplicate route %s %u.%u.%u.%u\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(route->ksnr_ipaddr)); - LBUG(); - } - } - - route->ksnr_peer = peer; - ksocknal_peer_addref(peer); - /* peer's routelist takes over my ref on 'route' */ - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - - list_for_each(tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_ipaddr != route->ksnr_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - /* keep going (typed routes) */ - } -} - -void -ksocknal_del_route_locked (ksock_route_t *route) -{ - ksock_peer_t *peer = route->ksnr_peer; - ksock_interface_t *iface; - ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - - LASSERT (!route->ksnr_deleted); - - /* Close associated conns */ - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry(ctmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_route != route) - continue; - - ksocknal_close_conn_locked (conn, 0); - } - - if (route->ksnr_myipaddr != 0) { - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes--; - } - - route->ksnr_deleted = 1; - list_del (&route->ksnr_list); - ksocknal_route_decref(route); /* drop peer's ref */ - - if (list_empty (&peer->ksnp_routes) && - list_empty (&peer->ksnp_conns)) { - /* I've just removed the last route to a peer with no active - * connections */ - ksocknal_unlink_peer_locked (peer); - } -} - -int -ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) -{ - struct list_head *tmp; - ksock_peer_t *peer; - ksock_peer_t *peer2; - ksock_route_t *route; - ksock_route_t *route2; - int rc; - - if (id.nid == LNET_NID_ANY || - id.pid == LNET_PID_ANY) - return (-EINVAL); - - /* Have a brand new peer ready... */ - rc = ksocknal_create_peer(&peer, ni, id); - if (rc != 0) - return rc; - - route = ksocknal_create_route (ipaddr, port); - if (route == NULL) { - ksocknal_peer_decref(peer); - return (-ENOMEM); - } - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - /* always called with a ref on ni, so shutdown can't have started */ - LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); - - peer2 = ksocknal_find_peer_locked (ni, id); - if (peer2 != NULL) { - ksocknal_peer_decref(peer); - peer = peer2; - } else { - /* peer table takes my ref on peer */ - list_add_tail (&peer->ksnp_list, - ksocknal_nid2peerlist (id.nid)); - } - - route2 = NULL; - list_for_each (tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); - - if (route2->ksnr_ipaddr == ipaddr) - break; - - route2 = NULL; - } - if (route2 == NULL) { - ksocknal_add_route_locked(peer, route); - route->ksnr_share_count++; - } else { - ksocknal_route_decref(route); - route2->ksnr_share_count++; - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - return (0); -} - -void -ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip) -{ - ksock_conn_t *conn; - ksock_route_t *route; - struct list_head *tmp; - struct list_head *nxt; - int nshared; - - LASSERT (!peer->ksnp_closing); - - /* Extra ref prevents peer disappearing until I'm done with it */ - ksocknal_peer_addref(peer); - - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - - /* no match */ - if (!(ip == 0 || route->ksnr_ipaddr == ip)) - continue; - - route->ksnr_share_count = 0; - /* This deletes associated conns too */ - ksocknal_del_route_locked (route); - } - - nshared = 0; - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - nshared += route->ksnr_share_count; - } - - if (nshared == 0) { - /* remove everything else if there are no explicit entries - * left */ - - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - - /* we should only be removing auto-entries */ - LASSERT(route->ksnr_share_count == 0); - ksocknal_del_route_locked (route); - } - - list_for_each_safe (tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - ksocknal_close_conn_locked(conn, 0); - } - } - - ksocknal_peer_decref(peer); - /* NB peer unlinks itself when last conn/route is removed */ -} - -int -ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip) -{ - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - ksock_peer_t *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - if (id.nid != LNET_NID_ANY) - lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers; - else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) && - (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid))) - continue; - - ksocknal_peer_addref(peer); /* a ref for me... */ - - ksocknal_del_peer_locked (peer, ip); - - if (peer->ksnp_closing && !list_empty(&peer->ksnp_tx_queue)) { - LASSERT (list_empty(&peer->ksnp_conns)); - LASSERT (list_empty(&peer->ksnp_routes)); - - list_splice_init(&peer->ksnp_tx_queue, &zombies); - } - - ksocknal_peer_decref(peer); /* ...till here */ - - rc = 0; /* matched! */ - } - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - ksocknal_txlist_done(ni, &zombies, 1); - - return (rc); -} - -ksock_conn_t * -ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index) -{ - ksock_peer_t *peer; - struct list_head *ptmp; - ksock_conn_t *conn; - struct list_head *ctmp; - int i; - - read_lock (&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - - LASSERT (!peer->ksnp_closing); - - if (peer->ksnp_ni != ni) - continue; - - list_for_each (ctmp, &peer->ksnp_conns) { - if (index-- > 0) - continue; - - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - ksocknal_conn_addref(conn); - read_unlock (&ksocknal_data.ksnd_global_lock); - return (conn); - } - } - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - return (NULL); -} - -ksock_sched_t * -ksocknal_choose_scheduler_locked (unsigned int irq) -{ - ksock_sched_t *sched; - ksock_irqinfo_t *info; - int i; - - LASSERT (irq < NR_IRQS); - info = &ksocknal_data.ksnd_irqinfo[irq]; - - if (irq != 0 && /* hardware NIC */ - info->ksni_valid) { /* already set up */ - return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]); - } - - /* software NIC (irq == 0) || not associated with a scheduler yet. - * Choose the CPU with the fewest connections... */ - sched = &ksocknal_data.ksnd_schedulers[0]; - for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++) - if (sched->kss_nconns > - ksocknal_data.ksnd_schedulers[i].kss_nconns) - sched = &ksocknal_data.ksnd_schedulers[i]; - - if (irq != 0) { /* Hardware NIC */ - info->ksni_valid = 1; - info->ksni_sched = sched - ksocknal_data.ksnd_schedulers; - - /* no overflow... */ - LASSERT (info->ksni_sched == sched - ksocknal_data.ksnd_schedulers); - } - - return (sched); -} - -int -ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs) -{ - ksock_net_t *net = ni->ni_data; - int i; - int nip; - - read_lock (&ksocknal_data.ksnd_global_lock); - - nip = net->ksnn_ninterfaces; - LASSERT (nip <= LNET_MAX_INTERFACES); - - /* Only offer interfaces for additional connections if I have - * more than one. */ - if (nip < 2) { - read_unlock (&ksocknal_data.ksnd_global_lock); - return 0; - } - - for (i = 0; i < nip; i++) { - ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; - LASSERT (ipaddrs[i] != 0); - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - return (nip); -} - -int -ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips) -{ - int best_netmatch = 0; - int best_xor = 0; - int best = -1; - int this_xor; - int this_netmatch; - int i; - - for (i = 0; i < nips; i++) { - if (ips[i] == 0) - continue; - - this_xor = (ips[i] ^ iface->ksni_ipaddr); - this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best < 0 || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_xor > this_xor))) - continue; - - best = i; - best_netmatch = this_netmatch; - best_xor = this_xor; - } - - LASSERT (best >= 0); - return (best); -} - -int -ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) -{ - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - ksock_net_t *net = peer->ksnp_ni->ni_data; - ksock_interface_t *iface; - ksock_interface_t *best_iface; - int n_ips; - int i; - int j; - int k; - __u32 ip; - __u32 xor; - int this_netmatch; - int best_netmatch; - int best_npeers; - - /* CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness shouldn't matter */ - - /* Also note that I'm not going to return more than n_peerips - * interfaces, even if I have more myself */ - - write_lock_bh (global_lock); - - LASSERT (n_peerips <= LNET_MAX_INTERFACES); - LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); - - /* Only match interfaces for additional connections - * if I have > 1 interface */ - n_ips = (net->ksnn_ninterfaces < 2) ? 0 : - MIN(n_peerips, net->ksnn_ninterfaces); - - for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) { - /* ^ yes really... */ - - /* If we have any new interfaces, first tick off all the - * peer IPs that match old interfaces, then choose new - * interfaces to match the remaining peer IPS. - * We don't forget interfaces we've stopped using; we might - * start using them again... */ - - if (i < peer->ksnp_n_passive_ips) { - /* Old interface. */ - ip = peer->ksnp_passive_ips[i]; - best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip); - - /* peer passive ips are kept up to date */ - LASSERT(best_iface != NULL); - } else { - /* choose a new interface */ - LASSERT (i == peer->ksnp_n_passive_ips); - - best_iface = NULL; - best_netmatch = 0; - best_npeers = 0; - - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - ip = iface->ksni_ipaddr; - - for (k = 0; k < peer->ksnp_n_passive_ips; k++) - if (peer->ksnp_passive_ips[k] == ip) - break; - - if (k < peer->ksnp_n_passive_ips) /* using it already */ - continue; - - k = ksocknal_match_peerip(iface, peerips, n_peerips); - xor = (ip ^ peerips[k]); - this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best_iface == NULL || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_npeers > iface->ksni_npeers))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_npeers = iface->ksni_npeers; - } - - best_iface->ksni_npeers++; - ip = best_iface->ksni_ipaddr; - peer->ksnp_passive_ips[i] = ip; - peer->ksnp_n_passive_ips = i+1; - } - - LASSERT (best_iface != NULL); - - /* mark the best matching peer IP used */ - j = ksocknal_match_peerip(best_iface, peerips, n_peerips); - peerips[j] = 0; - } - - /* Overwrite input peer IP addresses */ - memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips)); - - write_unlock_bh (global_lock); - - return (n_ips); -} - -void -ksocknal_create_routes(ksock_peer_t *peer, int port, - __u32 *peer_ipaddrs, int npeer_ipaddrs) -{ - ksock_route_t *newroute = NULL; - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - lnet_ni_t *ni = peer->ksnp_ni; - ksock_net_t *net = ni->ni_data; - struct list_head *rtmp; - ksock_route_t *route; - ksock_interface_t *iface; - ksock_interface_t *best_iface; - int best_netmatch; - int this_netmatch; - int best_nroutes; - int i; - int j; - - /* CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness here shouldn't matter */ - - write_lock_bh (global_lock); - - if (net->ksnn_ninterfaces < 2) { - /* Only create additional connections - * if I have > 1 interface */ - write_unlock_bh (global_lock); - return; - } - - LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES); - - for (i = 0; i < npeer_ipaddrs; i++) { - if (newroute != NULL) { - newroute->ksnr_ipaddr = peer_ipaddrs[i]; - } else { - write_unlock_bh (global_lock); - - newroute = ksocknal_create_route(peer_ipaddrs[i], port); - if (newroute == NULL) - return; - - write_lock_bh (global_lock); - } - - if (peer->ksnp_closing) { - /* peer got closed under me */ - break; - } - - /* Already got a route? */ - route = NULL; - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); - - if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) - break; - - route = NULL; - } - if (route != NULL) - continue; - - best_iface = NULL; - best_nroutes = 0; - best_netmatch = 0; - - LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); - - /* Select interface to connect from */ - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - - /* Using this interface already? */ - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); - - if (route->ksnr_myipaddr == iface->ksni_ipaddr) - break; - - route = NULL; - } - if (route != NULL) - continue; - - this_netmatch = (((iface->ksni_ipaddr ^ - newroute->ksnr_ipaddr) & - iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best_iface == NULL || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_nroutes > iface->ksni_nroutes))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_nroutes = iface->ksni_nroutes; - } - - if (best_iface == NULL) - continue; - - newroute->ksnr_myipaddr = best_iface->ksni_ipaddr; - best_iface->ksni_nroutes++; - - ksocknal_add_route_locked(peer, newroute); - newroute = NULL; - } - - write_unlock_bh (global_lock); - if (newroute != NULL) - ksocknal_route_decref(newroute); -} - -int -ksocknal_accept (lnet_ni_t *ni, cfs_socket_t *sock) -{ - ksock_connreq_t *cr; - int rc; - __u32 peer_ip; - int peer_port; - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT (rc == 0); /* we succeeded before */ - - LIBCFS_ALLOC(cr, sizeof(*cr)); - if (cr == NULL) { - LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from " - "%u.%u.%u.%u: memory exhausted\n", - HIPQUAD(peer_ip)); - return -ENOMEM; - } - - lnet_ni_addref(ni); - cr->ksncr_ni = ni; - cr->ksncr_sock = sock; - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - - list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); - cfs_waitq_signal(&ksocknal_data.ksnd_connd_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - return 0; -} - -int -ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr) -{ - ksock_route_t *route; - - list_for_each_entry (route, &peer->ksnp_routes, ksnr_list) { - - if (route->ksnr_ipaddr == ipaddr) - return route->ksnr_connecting; - } - return 0; -} - -int -ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, - cfs_socket_t *sock, int type) -{ - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - CFS_LIST_HEAD (zombies); - lnet_process_id_t peerid; - struct list_head *tmp; - __u64 incarnation; - ksock_conn_t *conn; - ksock_conn_t *conn2; - ksock_peer_t *peer = NULL; - ksock_peer_t *peer2; - ksock_sched_t *sched; - ksock_hello_msg_t *hello; - unsigned int irq; - ksock_tx_t *tx; - int rc; - int active; - char *warn = NULL; - - active = (route != NULL); - - LASSERT (active == (type != SOCKLND_CONN_NONE)); - - irq = ksocknal_lib_sock_irq (sock); - - LIBCFS_ALLOC(conn, sizeof(*conn)); - if (conn == NULL) { - rc = -ENOMEM; - goto failed_0; - } - - memset (conn, 0, sizeof (*conn)); - conn->ksnc_peer = NULL; - conn->ksnc_route = NULL; - conn->ksnc_sock = sock; - /* 2 ref, 1 for conn, another extra ref prevents socket - * being closed before establishment of connection */ - atomic_set (&conn->ksnc_sock_refcount, 2); - conn->ksnc_type = type; - ksocknal_lib_save_callback(sock, conn); - atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ - - conn->ksnc_zc_capable = ksocknal_lib_zc_capable(sock); - conn->ksnc_rx_ready = 0; - conn->ksnc_rx_scheduled = 0; - - CFS_INIT_LIST_HEAD (&conn->ksnc_tx_queue); - conn->ksnc_tx_ready = 0; - conn->ksnc_tx_scheduled = 0; - conn->ksnc_tx_mono = NULL; - atomic_set (&conn->ksnc_tx_nob, 0); - - LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - if (hello == NULL) { - rc = -ENOMEM; - goto failed_1; - } - - /* stash conn's local and remote addrs */ - rc = ksocknal_lib_get_conn_addrs (conn); - if (rc != 0) - goto failed_1; - - /* Find out/confirm peer's NID and connection type and get the - * vector of interfaces she's willing to let me connect to. - * Passive connections use the listener timeout since the peer sends - * eagerly */ - - if (active) { - peer = route->ksnr_peer; - LASSERT(ni == peer->ksnp_ni); - - /* Active connection sends HELLO eagerly */ - hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips); - peerid = peer->ksnp_id; - - write_lock_bh(global_lock); - conn->ksnc_proto = peer->ksnp_proto; - write_unlock_bh(global_lock); - - if (conn->ksnc_proto == NULL) { - conn->ksnc_proto = &ksocknal_protocol_v2x; -#if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol != 2) - conn->ksnc_proto = &ksocknal_protocol_v1x; -#endif - } - - rc = ksocknal_send_hello (ni, conn, peerid.nid, hello); - if (rc != 0) - goto failed_1; - } else { - peerid.nid = LNET_NID_ANY; - peerid.pid = LNET_PID_ANY; - - /* Passive, get protocol from peer */ - conn->ksnc_proto = NULL; - } - - rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation); - if (rc < 0) - goto failed_1; - - LASSERT (rc == 0 || active); - LASSERT (conn->ksnc_proto != NULL); - LASSERT (peerid.nid != LNET_NID_ANY); - - if (active) { - ksocknal_peer_addref(peer); - write_lock_bh (global_lock); - } else { - rc = ksocknal_create_peer(&peer, ni, peerid); - if (rc != 0) - goto failed_1; - - write_lock_bh (global_lock); - - /* called with a ref on ni, so shutdown can't have started */ - LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); - - peer2 = ksocknal_find_peer_locked(ni, peerid); - if (peer2 == NULL) { - /* NB this puts an "empty" peer in the peer - * table (which takes my ref) */ - list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(peerid.nid)); - } else { - ksocknal_peer_decref(peer); - peer = peer2; - } - - /* +1 ref for me */ - ksocknal_peer_addref(peer); - peer->ksnp_accepting++; - - /* Am I already connecting to this guy? Resolve in - * favour of higher NID... */ - if (peerid.nid < ni->ni_nid && - ksocknal_connecting(peer, conn->ksnc_ipaddr)) { - rc = EALREADY; - warn = "connection race resolution"; - goto failed_2; - } - } - - if (peer->ksnp_closing || - (active && route->ksnr_deleted)) { - /* peer/route got closed under me */ - rc = -ESTALE; - warn = "peer/route removed"; - goto failed_2; - } - - if (peer->ksnp_proto == NULL) { - /* Never connected before. - * NB recv_hello may have returned EPROTO to signal my peer - * wants a different protocol than the one I asked for. - */ - LASSERT (list_empty(&peer->ksnp_conns)); - - peer->ksnp_proto = conn->ksnc_proto; - peer->ksnp_incarnation = incarnation; - } - - if (peer->ksnp_proto != conn->ksnc_proto || - peer->ksnp_incarnation != incarnation) { - /* Peer rebooted or I've got the wrong protocol version */ - ksocknal_close_peer_conns_locked(peer, 0, 0); - - peer->ksnp_proto = NULL; - rc = ESTALE; - warn = peer->ksnp_incarnation != incarnation ? - "peer rebooted" : - "wrong proto version"; - goto failed_2; - } - - switch (rc) { - default: - LBUG(); - case 0: - break; - case EALREADY: - warn = "lost conn race"; - goto failed_2; - case EPROTO: - warn = "retry with different protocol version"; - goto failed_2; - } - - /* Refuse to duplicate an existing connection, unless this is a - * loopback connection */ - if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || - conn2->ksnc_myipaddr != conn->ksnc_myipaddr || - conn2->ksnc_type != conn->ksnc_type) - continue; - - /* Reply on a passive connection attempt so the peer - * realises we're connected. */ - LASSERT (rc == 0); - if (!active) - rc = EALREADY; - - warn = "duplicate"; - goto failed_2; - } - } - - /* If the connection created by this route didn't bind to the IP - * address the route connected to, the connection/route matching - * code below probably isn't going to work. */ - if (active && - route->ksnr_ipaddr != conn->ksnc_ipaddr) { - CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(route->ksnr_ipaddr), - HIPQUAD(conn->ksnc_ipaddr)); - } - - /* Search for a route corresponding to the new connection and - * create an association. This allows incoming connections created - * by routes in my peer to match my own route entries so I don't - * continually create duplicate routes. */ - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - - if (route->ksnr_ipaddr != conn->ksnc_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - break; - } - - conn->ksnc_peer = peer; /* conn takes my ref on peer */ - peer->ksnp_last_alive = cfs_time_current(); - peer->ksnp_error = 0; - - sched = ksocknal_choose_scheduler_locked (irq); - sched->kss_nconns++; - conn->ksnc_scheduler = sched; - - /* Set the deadline for the outgoing HELLO to drain */ - conn->ksnc_tx_bufnob = SOCK_WMEM_QUEUED(sock); - conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with adding to peer's conn list */ - - list_add (&conn->ksnc_list, &peer->ksnp_conns); - ksocknal_conn_addref(conn); - - ksocknal_new_packet(conn, 0); - - /* Take all the packets blocking for a connection. - * NB, it might be nicer to share these blocked packets among any - * other connections that are becoming established. */ - while (!list_empty (&peer->ksnp_tx_queue)) { - tx = list_entry (peer->ksnp_tx_queue.next, - ksock_tx_t, tx_list); - - list_del (&tx->tx_list); - ksocknal_queue_tx_locked (tx, conn); - } - - write_unlock_bh (global_lock); - - /* We've now got a new connection. Any errors from here on are just - * like "normal" comms errors and we close the connection normally. - * NB (a) we still have to send the reply HELLO for passive - * connections, - * (b) normal I/O on the conn is blocked until I setup and call the - * socket callbacks. - */ - - ksocknal_lib_bind_irq (irq); - - CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d" - " incarnation:"LPD64" sched[%d]/%d\n", - libcfs_id2str(peerid), conn->ksnc_proto->pro_version, - HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port, incarnation, - (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq); - - if (active) { - /* additional routes after interface exchange? */ - ksocknal_create_routes(peer, conn->ksnc_port, - hello->kshm_ips, hello->kshm_nips); - } else { - hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips, - hello->kshm_nips); - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - } - - LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - - /* setup the socket AFTER I've received hello (it disables - * SO_LINGER). I might call back to the acceptor who may want - * to send a protocol version response and then close the - * socket; this ensures the socket only tears down after the - * response has been sent. */ - if (rc == 0) - rc = ksocknal_lib_setup_sock(sock); - - write_lock_bh(global_lock); - - /* NB my callbacks block while I hold ksnd_global_lock */ - ksocknal_lib_set_callback(sock, conn); - - if (!active) - peer->ksnp_accepting--; - - write_unlock_bh(global_lock); - - if (rc != 0) { - write_lock_bh(global_lock); - ksocknal_close_conn_locked(conn, rc); - write_unlock_bh(global_lock); - } else if (ksocknal_connsock_addref(conn) == 0) { - /* Allow I/O to proceed. */ - ksocknal_read_callback(conn); - ksocknal_write_callback(conn); - ksocknal_connsock_decref(conn); - } - - ksocknal_connsock_decref(conn); - ksocknal_conn_decref(conn); - return rc; - - failed_2: - if (!peer->ksnp_closing && - list_empty (&peer->ksnp_conns) && - list_empty (&peer->ksnp_routes)) { - list_add(&zombies, &peer->ksnp_tx_queue); - list_del_init(&peer->ksnp_tx_queue); - ksocknal_unlink_peer_locked(peer); - } - - write_unlock_bh (global_lock); - - if (warn != NULL) { - if (rc < 0) - CERROR("Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - else - CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - } - - if (!active) { - if (rc > 0) { - /* Request retry by replying with CONN_NONE - * ksnc_proto has been set already */ - conn->ksnc_type = SOCKLND_CONN_NONE; - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, peerid.nid, hello); - } - - write_lock_bh(global_lock); - peer->ksnp_accepting--; - write_unlock_bh(global_lock); - } - - ksocknal_txlist_done(ni, &zombies, 1); - ksocknal_peer_decref(peer); - - failed_1: - if (hello != NULL) - LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - - LIBCFS_FREE (conn, sizeof(*conn)); - - failed_0: - libcfs_sock_release(sock); - return rc; -} - -void -ksocknal_close_conn_locked (ksock_conn_t *conn, int error) -{ - /* This just does the immmediate housekeeping, and queues the - * connection for the reaper to terminate. - * Caller holds ksnd_global_lock exclusively in irq context */ - ksock_peer_t *peer = conn->ksnc_peer; - ksock_route_t *route; - ksock_conn_t *conn2; - struct list_head *tmp; - - LASSERT (peer->ksnp_error == 0); - LASSERT (!conn->ksnc_closing); - conn->ksnc_closing = 1; - - /* ksnd_deathrow_conns takes over peer's ref */ - list_del (&conn->ksnc_list); - - route = conn->ksnc_route; - if (route != NULL) { - /* dissociate conn from route... */ - LASSERT (!route->ksnr_deleted); - LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); - - conn2 = NULL; - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn2->ksnc_route == route && - conn2->ksnc_type == conn->ksnc_type) - break; - - conn2 = NULL; - } - if (conn2 == NULL) - route->ksnr_connected &= ~(1 << conn->ksnc_type); - - conn->ksnc_route = NULL; - -#if 0 /* irrelevent with only eager routes */ - list_del (&route->ksnr_list); /* make route least favourite */ - list_add_tail (&route->ksnr_list, &peer->ksnp_routes); -#endif - ksocknal_route_decref(route); /* drop conn's ref on route */ - } - - if (list_empty (&peer->ksnp_conns)) { - /* No more connections to this peer */ - - peer->ksnp_proto = NULL; /* renegotiate protocol version */ - peer->ksnp_error = error; /* stash last conn close reason */ - - if (list_empty (&peer->ksnp_routes)) { - /* I've just closed last conn belonging to a - * peer with no routes to it */ - ksocknal_unlink_peer_locked (peer); - } - } - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - - list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns); - cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); -} - -void -ksocknal_peer_failed (ksock_peer_t *peer) -{ - time_t last_alive = 0; - int notify = 0; - - /* There has been a connection failure or comms error; but I'll only - * tell LNET I think the peer is dead if it's to another kernel and - * there are no connections or connection attempts in existance. */ - - read_lock (&ksocknal_data.ksnd_global_lock); - - if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 && - list_empty(&peer->ksnp_conns) && - peer->ksnp_accepting == 0 && - ksocknal_find_connecting_route_locked(peer) == NULL) { - notify = 1; - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ksnp_last_alive); - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - - if (notify) - lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0, - last_alive); -} - -void -ksocknal_terminate_conn (ksock_conn_t *conn) -{ - /* This gets called by the reaper (guaranteed thread context) to - * disengage the socket from its callbacks and close it. - * ksnc_refcount will eventually hit zero, and then the reaper will - * destroy it. */ - ksock_peer_t *peer = conn->ksnc_peer; - ksock_sched_t *sched = conn->ksnc_scheduler; - int failed = 0; - struct list_head *tmp; - struct list_head *nxt; - ksock_tx_t *tx; - LIST_HEAD (zlist); - - LASSERT(conn->ksnc_closing); - - /* wake up the scheduler to "send" all remaining packets to /dev/null */ - spin_lock_bh (&sched->kss_lock); - - /* a closing conn is always ready to tx */ - conn->ksnc_tx_ready = 1; - - if (!conn->ksnc_tx_scheduled && - !list_empty(&conn->ksnc_tx_queue)){ - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - cfs_waitq_signal (&sched->kss_waitq); - } - - spin_unlock_bh (&sched->kss_lock); - - spin_lock(&peer->ksnp_lock); - - list_for_each_safe(tmp, nxt, &peer->ksnp_zc_req_list) { - tx = list_entry(tmp, ksock_tx_t, tx_zc_list); - - if (tx->tx_conn != conn) - continue; - - LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0); - - tx->tx_msg.ksm_zc_req_cookie = 0; - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); - } - - spin_unlock(&peer->ksnp_lock); - - list_for_each_safe(tmp, nxt, &zlist) { - tx = list_entry(tmp, ksock_tx_t, tx_zc_list); - - list_del(&tx->tx_zc_list); - ksocknal_tx_decref(tx); - } - - /* serialise with callbacks */ - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - ksocknal_lib_reset_callback(conn->ksnc_sock, conn); - - /* OK, so this conn may not be completely disengaged from its - * scheduler yet, but it _has_ committed to terminate... */ - conn->ksnc_scheduler->kss_nconns--; - - if (peer->ksnp_error != 0) { - /* peer's last conn closed in error */ - LASSERT (list_empty (&peer->ksnp_conns)); - failed = 1; - peer->ksnp_error = 0; /* avoid multiple notifications */ - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - if (failed) - ksocknal_peer_failed(peer); - - /* The socket is closed on the final put; either here, or in - * ksocknal_{send,recv}msg(). Since we set up the linger2 option - * when the connection was established, this will close the socket - * immediately, aborting anything buffered in it. Any hung - * zero-copy transmits will therefore complete in finite time. */ - ksocknal_connsock_decref(conn); -} - -void -ksocknal_queue_zombie_conn (ksock_conn_t *conn) -{ - /* Queue the conn for the reaper to destroy */ - - LASSERT (atomic_read(&conn->ksnc_conn_refcount) == 0); - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - - list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); - cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); -} - -void -ksocknal_destroy_conn (ksock_conn_t *conn) -{ - /* Final coup-de-grace of the reaper */ - CDEBUG (D_NET, "connection %p\n", conn); - - LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0); - LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0); - LASSERT (conn->ksnc_sock == NULL); - LASSERT (conn->ksnc_route == NULL); - LASSERT (!conn->ksnc_tx_scheduled); - LASSERT (!conn->ksnc_rx_scheduled); - LASSERT (list_empty(&conn->ksnc_tx_queue)); - - /* complete current receive if any */ - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_LNET_PAYLOAD: - CERROR("Completing partial receive from %s" - ", ip %d.%d.%d.%d:%d, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - lnet_finalize (conn->ksnc_peer->ksnp_ni, - conn->ksnc_cookie, -EIO); - break; - case SOCKNAL_RX_LNET_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of lnet header from %s" - ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of ksock message from %s" - ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_SLOP: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of slops from %s" - ", ip %d.%d.%d.%d:%d, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - break; - default: - LBUG (); - break; - } - - ksocknal_peer_decref(conn->ksnc_peer); - - LIBCFS_FREE (conn, sizeof (*conn)); -} - -int -ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) -{ - ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - - if (ipaddr == 0 || - conn->ksnc_ipaddr == ipaddr) { - count++; - ksocknal_close_conn_locked (conn, why); - } - } - - return (count); -} - -int -ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) -{ - ksock_peer_t *peer = conn->ksnc_peer; - __u32 ipaddr = conn->ksnc_ipaddr; - int count; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - count = ksocknal_close_peer_conns_locked (peer, ipaddr, why); - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - return (count); -} - -int -ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr) -{ - ksock_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - if (id.nid != LNET_NID_ANY) - lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers; - else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { - - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - - if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid))) - continue; - - count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0); - } - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - /* wildcards always succeed */ - if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) - return (0); - - return (count == 0 ? -ENOENT : 0); -} - -void -ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive) -{ - /* The router is telling me she's been notified of a change in - * gateway state.... */ - lnet_process_id_t id = {.nid = gw_nid, .pid = LNET_PID_ANY}; - - CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), - alive ? "up" : "down"); - - if (!alive) { - /* If the gateway crashed, close all open connections... */ - ksocknal_close_matching_conns (id, 0); - return; - } - - /* ...otherwise do nothing. We can only establish new connections - * if we have autroutes, and these connect on demand. */ -} - -void -ksocknal_push_peer (ksock_peer_t *peer) -{ - int index; - int i; - struct list_head *tmp; - ksock_conn_t *conn; - - for (index = 0; ; index++) { - read_lock (&ksocknal_data.ksnd_global_lock); - - i = 0; - conn = NULL; - - list_for_each (tmp, &peer->ksnp_conns) { - if (i++ == index) { - conn = list_entry (tmp, ksock_conn_t, ksnc_list); - ksocknal_conn_addref(conn); - break; - } - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - - if (conn == NULL) - break; - - ksocknal_lib_push_conn (conn); - ksocknal_conn_decref(conn); - } -} - -int -ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id) -{ - ksock_peer_t *peer; - struct list_head *tmp; - int index; - int i; - int j; - int rc = -ENOENT; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - for (j = 0; ; j++) { - read_lock (&ksocknal_data.ksnd_global_lock); - - index = 0; - peer = NULL; - - list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(tmp, ksock_peer_t, - ksnp_list); - - if (!((id.nid == LNET_NID_ANY || - id.nid == peer->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || - id.pid == peer->ksnp_id.pid))) { - peer = NULL; - continue; - } - - if (index++ == j) { - ksocknal_peer_addref(peer); - break; - } - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - - if (peer != NULL) { - rc = 0; - ksocknal_push_peer (peer); - ksocknal_peer_decref(peer); - } - } - - } - - return (rc); -} - -int -ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) -{ - ksock_net_t *net = ni->ni_data; - ksock_interface_t *iface; - int rc; - int i; - int j; - struct list_head *ptmp; - ksock_peer_t *peer; - struct list_head *rtmp; - ksock_route_t *route; - - if (ipaddress == 0 || - netmask == 0) - return (-EINVAL); - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - iface = ksocknal_ip2iface(ni, ipaddress); - if (iface != NULL) { - /* silently ignore dups */ - rc = 0; - } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) { - rc = -ENOSPC; - } else { - iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++]; - - iface->ksni_ipaddr = ipaddress; - iface->ksni_netmask = netmask; - iface->ksni_nroutes = 0; - iface->ksni_npeers = 0; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, ksock_peer_t, ksnp_list); - - for (j = 0; j < peer->ksnp_n_passive_ips; j++) - if (peer->ksnp_passive_ips[j] == ipaddress) - iface->ksni_npeers++; - - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); - - if (route->ksnr_myipaddr == ipaddress) - iface->ksni_nroutes++; - } - } - } - - rc = 0; - /* NB only new connections will pay attention to the new interface! */ - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - return (rc); -} - -void -ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) -{ - struct list_head *tmp; - struct list_head *nxt; - ksock_route_t *route; - ksock_conn_t *conn; - int i; - int j; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) - if (peer->ksnp_passive_ips[i] == ipaddr) { - for (j = i+1; j < peer->ksnp_n_passive_ips; j++) - peer->ksnp_passive_ips[j-1] = - peer->ksnp_passive_ips[j]; - peer->ksnp_n_passive_ips--; - break; - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); - - if (route->ksnr_myipaddr != ipaddr) - continue; - - if (route->ksnr_share_count != 0) { - /* Manually created; keep, but unbind */ - route->ksnr_myipaddr = 0; - } else { - ksocknal_del_route_locked(route); - } - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_myipaddr == ipaddr) - ksocknal_close_conn_locked (conn, 0); - } -} - -int -ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress) -{ - ksock_net_t *net = ni->ni_data; - int rc = -ENOENT; - struct list_head *tmp; - struct list_head *nxt; - ksock_peer_t *peer; - __u32 this_ip; - int i; - int j; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - this_ip = net->ksnn_interfaces[i].ksni_ipaddr; - - if (!(ipaddress == 0 || - ipaddress == this_ip)) - continue; - - rc = 0; - - for (j = i+1; j < net->ksnn_ninterfaces; j++) - net->ksnn_interfaces[j-1] = - net->ksnn_interfaces[j]; - - net->ksnn_ninterfaces--; - - for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { - list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) { - peer = list_entry(tmp, ksock_peer_t, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - ksocknal_peer_del_interface_locked(peer, this_ip); - } - } - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - return (rc); -} - -int -ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc; - - switch(cmd) { - case IOC_LIBCFS_GET_INTERFACE: { - ksock_net_t *net = ni->ni_data; - ksock_interface_t *iface; - - read_lock (&ksocknal_data.ksnd_global_lock); - - if (data->ioc_count < 0 || - data->ioc_count >= net->ksnn_ninterfaces) { - rc = -ENOENT; - } else { - rc = 0; - iface = &net->ksnn_interfaces[data->ioc_count]; - - data->ioc_u32[0] = iface->ksni_ipaddr; - data->ioc_u32[1] = iface->ksni_netmask; - data->ioc_u32[2] = iface->ksni_npeers; - data->ioc_u32[3] = iface->ksni_nroutes; - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - return rc; - } - - case IOC_LIBCFS_ADD_INTERFACE: - return ksocknal_add_interface(ni, - data->ioc_u32[0], /* IP address */ - data->ioc_u32[1]); /* net mask */ - - case IOC_LIBCFS_DEL_INTERFACE: - return ksocknal_del_interface(ni, - data->ioc_u32[0]); /* IP address */ - - case IOC_LIBCFS_GET_PEER: { - lnet_process_id_t id = {0,}; - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; - - rc = ksocknal_get_peer_info(ni, data->ioc_count, - &id, &myip, &ip, &port, - &conn_count, &share_count); - if (rc != 0) - return rc; - - data->ioc_nid = id.nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - data->ioc_u32[2] = myip; - data->ioc_u32[3] = conn_count; - data->ioc_u32[4] = id.pid; - return 0; - } - - case IOC_LIBCFS_ADD_PEER: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LUSTRE_SRV_LNET_PID}; - return ksocknal_add_peer (ni, id, - data->ioc_u32[0], /* IP */ - data->ioc_u32[1]); /* port */ - } - case IOC_LIBCFS_DEL_PEER: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LNET_PID_ANY}; - return ksocknal_del_peer (ni, id, - data->ioc_u32[0]); /* IP */ - } - case IOC_LIBCFS_GET_CONN: { - int txmem; - int rxmem; - int nagle; - ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count); - - if (conn == NULL) - return -ENOENT; - - ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); - - data->ioc_count = txmem; - data->ioc_nid = conn->ksnc_peer->ksnp_id.nid; - data->ioc_flags = nagle; - data->ioc_u32[0] = conn->ksnc_ipaddr; - data->ioc_u32[1] = conn->ksnc_port; - data->ioc_u32[2] = conn->ksnc_myipaddr; - data->ioc_u32[3] = conn->ksnc_type; - data->ioc_u32[4] = conn->ksnc_scheduler - - ksocknal_data.ksnd_schedulers; - data->ioc_u32[5] = rxmem; - data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid; - ksocknal_conn_decref(conn); - return 0; - } - - case IOC_LIBCFS_CLOSE_CONNECTION: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LNET_PID_ANY}; - - return ksocknal_close_matching_conns (id, - data->ioc_u32[0]); - } - case IOC_LIBCFS_REGISTER_MYNID: - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) - return 0; - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - - case IOC_LIBCFS_PUSH_CONNECTION: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LNET_PID_ANY}; - - return ksocknal_push(ni, id); - } - default: - return -EINVAL; - } - /* not reached */ -} - -void -ksocknal_free_buffers (void) -{ - LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0); - - if (ksocknal_data.ksnd_schedulers != NULL) - LIBCFS_FREE (ksocknal_data.ksnd_schedulers, - sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers); - - LIBCFS_FREE (ksocknal_data.ksnd_peers, - sizeof (struct list_head) * - ksocknal_data.ksnd_peer_hash_size); - - spin_lock(&ksocknal_data.ksnd_tx_lock); - - if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - struct list_head zlist; - ksock_tx_t *tx; - - list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); - list_del_init(&ksocknal_data.ksnd_idle_noop_txs); - spin_unlock(&ksocknal_data.ksnd_tx_lock); - - while(!list_empty(&zlist)) { - tx = list_entry(zlist.next, ksock_tx_t, tx_list); - list_del(&tx->tx_list); - LIBCFS_FREE(tx, tx->tx_desc_size); - } - } else { - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } -} - -void -ksocknal_base_shutdown (void) -{ - ksock_sched_t *sched; - int i; - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - LASSERT (ksocknal_data.ksnd_nnets == 0); - - switch (ksocknal_data.ksnd_init) { - default: - LASSERT (0); - - case SOCKNAL_INIT_ALL: - case SOCKNAL_INIT_DATA: - LASSERT (ksocknal_data.ksnd_peers != NULL); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - LASSERT (list_empty (&ksocknal_data.ksnd_peers[i])); - } - LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns)); - LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns)); - LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs)); - LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes)); - - if (ksocknal_data.ksnd_schedulers != NULL) - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - ksock_sched_t *kss = - &ksocknal_data.ksnd_schedulers[i]; - - LASSERT (list_empty (&kss->kss_tx_conns)); - LASSERT (list_empty (&kss->kss_rx_conns)); - LASSERT (list_empty (&kss->kss_zombie_noop_txs)); - LASSERT (kss->kss_nconns == 0); - } - - /* flag threads to terminate; wake and wait for them to die */ - ksocknal_data.ksnd_shuttingdown = 1; - cfs_waitq_broadcast (&ksocknal_data.ksnd_connd_waitq); - cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq); - - if (ksocknal_data.ksnd_schedulers != NULL) - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - sched = &ksocknal_data.ksnd_schedulers[i]; - cfs_waitq_broadcast(&sched->kss_waitq); - } - - i = 4; - read_lock (&ksocknal_data.ksnd_global_lock); - while (ksocknal_data.ksnd_nthreads != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d threads to terminate\n", - ksocknal_data.ksnd_nthreads); - read_unlock (&ksocknal_data.ksnd_global_lock); - cfs_pause(cfs_time_seconds(1)); - read_lock (&ksocknal_data.ksnd_global_lock); - } - read_unlock (&ksocknal_data.ksnd_global_lock); - - ksocknal_free_buffers(); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; - break; - } - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - PORTAL_MODULE_UNUSE; -} - - -__u64 -ksocknal_new_incarnation (void) -{ - struct timeval tv; - - /* The incarnation number is the time this module loaded and it - * identifies this particular instance of the socknal. Hopefully - * we won't be able to reboot more frequently than 1MHz for the - * forseeable future :) */ - - do_gettimeofday(&tv); - - return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; -} - -int -ksocknal_base_startup (void) -{ - int rc; - int i; - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - LASSERT (ksocknal_data.ksnd_nnets == 0); - - memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ - - ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC (ksocknal_data.ksnd_peers, - sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); - if (ksocknal_data.ksnd_peers == NULL) - return -ENOMEM; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) - CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); - - rwlock_init(&ksocknal_data.ksnd_global_lock); - - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); - cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq); - - spin_lock_init (&ksocknal_data.ksnd_connd_lock); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes); - cfs_waitq_init(&ksocknal_data.ksnd_connd_waitq); - - spin_lock_init (&ksocknal_data.ksnd_tx_lock); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs); - - /* NB memset above zeros whole of ksocknal_data, including - * ksocknal_data.ksnd_irqinfo[all].ksni_valid */ - - /* flag lists/ptrs/locks initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - PORTAL_MODULE_USE; - - ksocknal_data.ksnd_nschedulers = ksocknal_nsched(); - LIBCFS_ALLOC(ksocknal_data.ksnd_schedulers, - sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers); - if (ksocknal_data.ksnd_schedulers == NULL) - goto failed; - - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i]; - - spin_lock_init (&kss->kss_lock); - CFS_INIT_LIST_HEAD (&kss->kss_rx_conns); - CFS_INIT_LIST_HEAD (&kss->kss_tx_conns); - CFS_INIT_LIST_HEAD (&kss->kss_zombie_noop_txs); - cfs_waitq_init (&kss->kss_waitq); - } - - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - rc = ksocknal_thread_start (ksocknal_scheduler, - &ksocknal_data.ksnd_schedulers[i]); - if (rc != 0) { - CERROR("Can't spawn socknal scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - /* must have at least 2 connds to remain responsive to accepts while - * connecting */ - if (*ksocknal_tunables.ksnd_nconnds < 2) - *ksocknal_tunables.ksnd_nconnds = 2; - - for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) { - rc = ksocknal_thread_start (ksocknal_connd, (void *)((long)i)); - if (rc != 0) { - CERROR("Can't spawn socknal connd: %d\n", rc); - goto failed; - } - } - - rc = ksocknal_thread_start (ksocknal_reaper, NULL); - if (rc != 0) { - CERROR ("Can't spawn socknal reaper: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; - - return 0; - - failed: - ksocknal_base_shutdown(); - return -ENETDOWN; -} - -void -ksocknal_debug_peerhash (lnet_ni_t *ni) -{ - ksock_peer_t *peer = NULL; - struct list_head *tmp; - int i; - - read_lock (&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (tmp, ksock_peer_t, ksnp_list); - - if (peer->ksnp_ni == ni) break; - - peer = NULL; - } - } - - if (peer != NULL) { - ksock_route_t *route; - ksock_conn_t *conn; - - CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, " - "closing %d, accepting %d, err %d, zcookie "LPU64", " - "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id), - atomic_read(&peer->ksnp_refcount), - peer->ksnp_sharecount, peer->ksnp_closing, - peer->ksnp_accepting, peer->ksnp_error, - peer->ksnp_zc_next_cookie, - !list_empty(&peer->ksnp_tx_queue), - !list_empty(&peer->ksnp_zc_req_list)); - - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, " - "del %d\n", atomic_read(&route->ksnr_refcount), - route->ksnr_scheduled, route->ksnr_connecting, - route->ksnr_connected, route->ksnr_deleted); - } - - list_for_each (tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - CWARN ("Conn: ref %d, sref %d, t %d, c %d\n", - atomic_read(&conn->ksnc_conn_refcount), - atomic_read(&conn->ksnc_sock_refcount), - conn->ksnc_type, conn->ksnc_closing); - } - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - return; -} - -void -ksocknal_shutdown (lnet_ni_t *ni) -{ - ksock_net_t *net = ni->ni_data; - int i; - lnet_process_id_t anyid = {.nid = LNET_NID_ANY, - .pid = LNET_PID_ANY}; - - LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); - LASSERT(ksocknal_data.ksnd_nnets > 0); - - spin_lock_bh (&net->ksnn_lock); - net->ksnn_shutdown = 1; /* prevent new peers */ - spin_unlock_bh (&net->ksnn_lock); - - /* Delete all peers */ - ksocknal_del_peer(ni, anyid, 0); - - /* Wait for all peer state to clean up */ - i = 2; - spin_lock_bh (&net->ksnn_lock); - while (net->ksnn_npeers != 0) { - spin_unlock_bh (&net->ksnn_lock); - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to disconnect\n", - net->ksnn_npeers); - cfs_pause(cfs_time_seconds(1)); - - ksocknal_debug_peerhash(ni); - - spin_lock_bh (&net->ksnn_lock); - } - spin_unlock_bh (&net->ksnn_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0); - LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0); - } - - LIBCFS_FREE(net, sizeof(*net)); - - ksocknal_data.ksnd_nnets--; - if (ksocknal_data.ksnd_nnets == 0) - ksocknal_base_shutdown(); -} - -int -ksocknal_enumerate_interfaces(ksock_net_t *net) -{ - char **names; - int i; - int j; - int rc; - int n; - - n = libcfs_ipif_enumerate(&names); - if (n <= 0) { - CERROR("Can't enumerate interfaces: %d\n", n); - return n; - } - - for (i = j = 0; i < n; i++) { - int up; - __u32 ip; - __u32 mask; - - if (!strcmp(names[i], "lo")) /* skip the loopback IF */ - continue; - - rc = libcfs_ipif_query(names[i], &up, &ip, &mask); - if (rc != 0) { - CWARN("Can't get interface %s info: %d\n", - names[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s (down)\n", - names[i]); - continue; - } - - if (j == LNET_MAX_INTERFACES) { - CWARN("Ignoring interface %s (too many interfaces)\n", - names[i]); - continue; - } - - net->ksnn_interfaces[j].ksni_ipaddr = ip; - net->ksnn_interfaces[j].ksni_netmask = mask; - j++; - } - - libcfs_ipif_free_enumeration(names, n); - - if (j == 0) - CERROR("Can't find any usable interfaces\n"); - - return j; -} - -int -ksocknal_startup (lnet_ni_t *ni) -{ - ksock_net_t *net; - int rc; - int i; - - LASSERT (ni->ni_lnd == &the_ksocklnd); - - if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { - rc = ksocknal_base_startup(); - if (rc != 0) - return rc; - } - - LIBCFS_ALLOC(net, sizeof(*net)); - if (net == NULL) - goto fail_0; - - memset(net, 0, sizeof(*net)); - spin_lock_init(&net->ksnn_lock); - net->ksnn_incarnation = ksocknal_new_incarnation(); - ni->ni_data = net; - ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; - ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits; - - if (ni->ni_interfaces[0] == NULL) { - rc = ksocknal_enumerate_interfaces(net); - if (rc <= 0) - goto fail_1; - - net->ksnn_ninterfaces = 1; - } else { - for (i = 0; i < LNET_MAX_INTERFACES; i++) { - int up; - - if (ni->ni_interfaces[i] == NULL) - break; - - rc = libcfs_ipif_query( - ni->ni_interfaces[i], &up, - &net->ksnn_interfaces[i].ksni_ipaddr, - &net->ksnn_interfaces[i].ksni_netmask); - - if (rc != 0) { - CERROR("Can't get interface %s info: %d\n", - ni->ni_interfaces[i], rc); - goto fail_1; - } - - if (!up) { - CERROR("Interface %s is down\n", - ni->ni_interfaces[i]); - goto fail_1; - } - } - net->ksnn_ninterfaces = i; - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - net->ksnn_interfaces[0].ksni_ipaddr); - - ksocknal_data.ksnd_nnets++; - - return 0; - - fail_1: - LIBCFS_FREE(net, sizeof(*net)); - fail_0: - if (ksocknal_data.ksnd_nnets == 0) - ksocknal_base_shutdown(); - - return -ENETDOWN; -} - - -void __exit -ksocknal_module_fini (void) -{ - lnet_unregister_lnd(&the_ksocklnd); - ksocknal_lib_tunables_fini(); -} - -int __init -ksocknal_module_init (void) -{ - int rc; - - /* check ksnr_connected/connecting field large enough */ - CLASSERT(SOCKLND_CONN_NTYPES <= 4); - - rc = ksocknal_lib_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_ksocklnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel TCP Socket LND v2.0.0"); -MODULE_LICENSE("GPL"); - -cfs_module(ksocknal, "2.0.0", ksocknal_module_init, ksocknal_module_fini); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h deleted file mode 100644 index 0835a34e015b5eded97fa754b2bb7de0063f6777..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd.h +++ /dev/null @@ -1,550 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_PORTAL_ALLOC -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#define DEBUG_SUBSYSTEM S_LND - -#if defined(__linux__) -#include "socklnd_lib-linux.h" -#elif defined(__APPLE__) -#include "socklnd_lib-darwin.h" -#elif defined(__WINNT__) -#include "socklnd_lib-winnt.h" -#else -#error Unsupported Operating System -#endif - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> -#include <lnet/socklnd.h> - -#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define SOCKNAL_ENOMEM_RETRY CFS_TICK /* jiffies between retries */ - -#define SOCKNAL_ROUND_ROBIN 0 /* round robin / load balance */ - -#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */ -#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */ - -#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */ - -/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled). - * no risk if we're not running on a CONFIG_HIGHMEM platform. */ -#ifdef CONFIG_HIGHMEM -# define SOCKNAL_RISK_KMAP_DEADLOCK 0 -#else -# define SOCKNAL_RISK_KMAP_DEADLOCK 1 -#endif - -typedef struct /* per scheduler state */ -{ - spinlock_t kss_lock; /* serialise */ - struct list_head kss_rx_conns; /* conn waiting to be read */ - struct list_head kss_tx_conns; /* conn waiting to be written */ - struct list_head kss_zombie_noop_txs; /* zombie noop tx list */ - cfs_waitq_t kss_waitq; /* where scheduler sleeps */ - int kss_nconns; /* # connections assigned to this scheduler */ -} ksock_sched_t; - -typedef struct -{ - unsigned int ksni_valid:1; /* been set yet? */ - unsigned int ksni_bound:1; /* bound to a cpu yet? */ - unsigned int ksni_sched:6; /* which scheduler (assumes < 64) */ -} ksock_irqinfo_t; - -typedef struct /* in-use interface */ -{ - __u32 ksni_ipaddr; /* interface's IP address */ - __u32 ksni_netmask; /* interface's network mask */ - int ksni_nroutes; /* # routes using (active) */ - int ksni_npeers; /* # peers using (passive) */ - char ksni_name[16]; /* interface name */ -} ksock_interface_t; - -typedef struct -{ - int *ksnd_timeout; /* "stuck" socket timeout (seconds) */ - int *ksnd_nconnds; /* # connection daemons */ - int *ksnd_min_reconnectms; /* first connection retry after (ms)... */ - int *ksnd_max_reconnectms; /* ...exponentially increasing to this */ - int *ksnd_eager_ack; /* make TCP ack eagerly? */ - int *ksnd_typed_conns; /* drive sockets by type? */ - int *ksnd_min_bulk; /* smallest "large" message */ - int *ksnd_tx_buffer_size; /* socket tx buffer size */ - int *ksnd_rx_buffer_size; /* socket rx buffer size */ - int *ksnd_nagle; /* enable NAGLE? */ - int *ksnd_keepalive_idle; /* # idle secs before 1st probe */ - int *ksnd_keepalive_count; /* # probes */ - int *ksnd_keepalive_intvl; /* time between probes */ - int *ksnd_credits; /* # concurrent sends */ - int *ksnd_peercredits; /* # concurrent sends to 1 peer */ - int *ksnd_enable_csum; /* enable check sum */ - int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */ - unsigned int *ksnd_zc_min_frag; /* minimum zero copy frag size */ -#ifdef CPU_AFFINITY - int *ksnd_irq_affinity; /* enable IRQ affinity? */ -#endif -#ifdef SOCKNAL_BACKOFF - int *ksnd_backoff_init; /* initial TCP backoff */ - int *ksnd_backoff_max; /* maximum TCP backoff */ -#endif -#if SOCKNAL_VERSION_DEBUG - int *ksnd_protocol; /* protocol version */ -#endif -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - cfs_sysctl_table_header_t *ksnd_sysctl; /* sysctl interface */ -#endif -} ksock_tunables_t; - -typedef struct -{ - __u64 ksnn_incarnation; /* my epoch */ - spinlock_t ksnn_lock; /* serialise */ - int ksnn_npeers; /* # peers */ - int ksnn_shutdown; /* shutting down? */ - int ksnn_ninterfaces; /* IP interfaces */ - ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES]; -} ksock_net_t; - -typedef struct -{ - int ksnd_init; /* initialisation state */ - int ksnd_nnets; /* # networks set up */ - - rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */ - struct list_head *ksnd_peers; /* hash table of all my known peers */ - int ksnd_peer_hash_size; /* size of ksnd_peers */ - - int ksnd_nthreads; /* # live threads */ - int ksnd_shuttingdown; /* tell threads to exit */ - int ksnd_nschedulers; /* # schedulers */ - ksock_sched_t *ksnd_schedulers; /* their state */ - - atomic_t ksnd_nactive_txs; /* #active txs */ - - struct list_head ksnd_deathrow_conns; /* conns to close: reaper_lock*/ - struct list_head ksnd_zombie_conns; /* conns to free: reaper_lock */ - struct list_head ksnd_enomem_conns; /* conns to retry: reaper_lock*/ - cfs_waitq_t ksnd_reaper_waitq; /* reaper sleeps here */ - cfs_time_t ksnd_reaper_waketime; /* when reaper will wake */ - spinlock_t ksnd_reaper_lock; /* serialise */ - - int ksnd_enomem_tx; /* test ENOMEM sender */ - int ksnd_stall_tx; /* test sluggish sender */ - int ksnd_stall_rx; /* test sluggish receiver */ - - struct list_head ksnd_connd_connreqs; /* incoming connection requests */ - struct list_head ksnd_connd_routes; /* routes waiting to be connected */ - cfs_waitq_t ksnd_connd_waitq; /* connds sleep here */ - int ksnd_connd_connecting;/* # connds connecting */ - spinlock_t ksnd_connd_lock; /* serialise */ - - struct list_head ksnd_idle_noop_txs; /* list head for freed noop tx */ - spinlock_t ksnd_tx_lock; /* serialise, NOT safe in g_lock */ - - ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */ - -} ksock_nal_data_t; - -#define SOCKNAL_INIT_NOTHING 0 -#define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_ALL 2 - -/* A packet just assembled for transmission is represented by 1 or more - * struct iovec fragments (the first frag contains the portals header), - * followed by 0 or more lnet_kiov_t fragments. - * - * On the receive side, initially 1 struct iovec fragment is posted for - * receive (the header). Once the header has been received, the payload is - * received into either struct iovec or lnet_kiov_t fragments, depending on - * what the header matched or whether the message needs forwarding. */ - -struct ksock_conn; /* forward ref */ -struct ksock_peer; /* forward ref */ -struct ksock_route; /* forward ref */ -struct ksock_proto; /* forward ref */ - -typedef struct /* transmit packet */ -{ - struct list_head tx_list; /* queue on conn for transmission etc */ - struct list_head tx_zc_list; /* queue on peer for ZC request */ - atomic_t tx_refcount; /* tx reference count */ - int tx_nob; /* # packet bytes */ - int tx_resid; /* residual bytes */ - int tx_niov; /* # packet iovec frags */ - struct iovec *tx_iov; /* packet iovec frags */ - int tx_nkiov; /* # packet page frags */ - unsigned int tx_checked_zc; /* Have I checked if I should ZC? */ - lnet_kiov_t *tx_kiov; /* packet page frags */ - struct ksock_conn *tx_conn; /* owning conn */ - lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */ - ksock_msg_t tx_msg; /* socklnd message buffer */ - int tx_desc_size; /* size of this descriptor */ - union { - struct { - struct iovec iov; /* virt hdr */ - lnet_kiov_t kiov[0]; /* paged payload */ - } paged; - struct { - struct iovec iov[1]; /* virt hdr + payload */ - } virt; - } tx_frags; -} ksock_tx_t; - -#define KSOCK_NOOP_TX_SIZE offsetof(ksock_tx_t, tx_frags.paged.kiov[0]) - -/* network zero copy callback descriptor embedded in ksock_tx_t */ - -/* space for the rx frag descriptors; we either read a single contiguous - * header, or up to LNET_MAX_IOV frags of payload of either type. */ -typedef union { - struct iovec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; -} ksock_rxiovspace_t; - -#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */ -#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */ -#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */ -#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */ -#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */ -#define SOCKNAL_RX_SLOP 6 /* skipping body */ - -typedef struct ksock_conn -{ - struct ksock_peer *ksnc_peer; /* owning peer */ - struct ksock_route *ksnc_route; /* owning route */ - struct list_head ksnc_list; /* stash on peer's conn list */ - cfs_socket_t *ksnc_sock; /* actual socket */ - void *ksnc_saved_data_ready; /* socket's original data_ready() callback */ - void *ksnc_saved_write_space; /* socket's original write_space() callback */ - atomic_t ksnc_conn_refcount; /* conn refcount */ - atomic_t ksnc_sock_refcount; /* sock refcount */ - ksock_sched_t *ksnc_scheduler; /* who schedules this connection */ - __u32 ksnc_myipaddr; /* my IP */ - __u32 ksnc_ipaddr; /* peer's IP */ - int ksnc_port; /* peer's port */ - int ksnc_type:3; /* type of connection, should be signed value */ - int ksnc_closing:1; /* being shut down */ - int ksnc_flip:1; /* flip or not, only for V2.x */ - int ksnc_zc_capable:1; /* enable to ZC */ - struct ksock_proto *ksnc_proto; /* protocol for the connection */ - - /* reader */ - struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ - cfs_time_t ksnc_rx_deadline; /* when (in jiffies) receive times out */ - __u8 ksnc_rx_started; /* started receiving a message */ - __u8 ksnc_rx_ready; /* data ready to read */ - __u8 ksnc_rx_scheduled; /* being progressed */ - __u8 ksnc_rx_state; /* what is being read */ - int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - int ksnc_rx_nob_wanted; /* bytes actually wanted */ - int ksnc_rx_niov; /* # iovec frags */ - struct iovec *ksnc_rx_iov; /* the iovec frags */ - int ksnc_rx_nkiov; /* # page frags */ - lnet_kiov_t *ksnc_rx_kiov; /* the page frags */ - ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */ - __u32 ksnc_rx_csum; /* partial checksum for incoming data */ - void *ksnc_cookie; /* rx lnet_finalize passthru arg */ - ksock_msg_t ksnc_msg; /* incoming message buffer: - * V2.x message takes the whole struct - * V1.x message is a bare lnet_hdr_t, it's stored - * in ksnc_msg.ksm_u.lnetmsg */ - - /* WRITER */ - struct list_head ksnc_tx_list; /* where I enq waiting for output space */ - struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - ksock_tx_t *ksnc_tx_mono; /* V2.x only, next mono-packet, mono-packet is : - * a. lnet packet without piggyback - * b. noop ZC-ACK packet */ - cfs_time_t ksnc_tx_deadline; /* when (in jiffies) tx times out */ - int ksnc_tx_bufnob; /* send buffer marker */ - atomic_t ksnc_tx_nob; /* # bytes queued */ - int ksnc_tx_ready; /* write space */ - int ksnc_tx_scheduled; /* being progressed */ - -#if !SOCKNAL_SINGLE_FRAG_RX - struct iovec ksnc_rx_scratch_iov[LNET_MAX_IOV]; -#endif -#if !SOCKNAL_SINGLE_FRAG_TX - struct iovec ksnc_tx_scratch_iov[LNET_MAX_IOV]; -#endif -} ksock_conn_t; - -typedef struct ksock_route -{ - struct list_head ksnr_list; /* chain on peer route list */ - struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */ - struct ksock_peer *ksnr_peer; /* owning peer */ - atomic_t ksnr_refcount; /* # users */ - cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */ - cfs_duration_t ksnr_retry_interval; /* how long between retries */ - __u32 ksnr_myipaddr; /* my IP */ - __u32 ksnr_ipaddr; /* IP address to connect to */ - int ksnr_port; /* port to connect to */ - unsigned int ksnr_scheduled:1; /* scheduled for attention */ - unsigned int ksnr_connecting:1; /* connection establishment in progress */ - unsigned int ksnr_connected:4; /* connections established by type */ - unsigned int ksnr_deleted:1; /* been removed from peer? */ - unsigned int ksnr_share_count; /* created explicitly? */ - int ksnr_conn_count; /* # conns established by this route */ -} ksock_route_t; - -typedef struct ksock_peer -{ - struct list_head ksnp_list; /* stash on global peer list */ - lnet_process_id_t ksnp_id; /* who's on the other end(s) */ - atomic_t ksnp_refcount; /* # users */ - int ksnp_sharecount; /* lconf usage counter */ - int ksnp_closing; /* being closed */ - int ksnp_accepting; /* # passive connections pending */ - int ksnp_error; /* errno on closing last conn */ - __u64 ksnp_zc_next_cookie;/* ZC completion cookie */ - __u64 ksnp_incarnation; /* latest known peer incarnation */ - struct ksock_proto *ksnp_proto; /* latest known peer protocol */ - struct list_head ksnp_conns; /* all active connections */ - struct list_head ksnp_routes; /* routes */ - struct list_head ksnp_tx_queue; /* waiting packets */ - spinlock_t ksnp_lock; /* serialize, NOT safe in g_lock */ - struct list_head ksnp_zc_req_list; /* zero copy requests wait for ACK */ - cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */ - lnet_ni_t *ksnp_ni; /* which network */ - int ksnp_n_passive_ips; /* # of... */ - __u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; /* preferred local interfaces */ -} ksock_peer_t; - -typedef struct ksock_connreq -{ - struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */ - lnet_ni_t *ksncr_ni; /* chosen NI */ - cfs_socket_t *ksncr_sock; /* accepted socket */ -} ksock_connreq_t; - -extern ksock_nal_data_t ksocknal_data; -extern ksock_tunables_t ksocknal_tunables; - -typedef struct ksock_proto -{ - int pro_version; /* version number of protocol */ - int (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *); /* handshake function */ - int (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);/* handshake function */ - void (*pro_pack)(ksock_tx_t *); /* message pack */ - void (*pro_unpack)(ksock_msg_t *); /* message unpack */ -} ksock_proto_t; - -extern ksock_proto_t ksocknal_protocol_v1x; -extern ksock_proto_t ksocknal_protocol_v2x; - -#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR -#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR -#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR - -#ifndef CPU_MASK_NONE -#define CPU_MASK_NONE 0UL -#endif - -static inline int -ksocknal_route_mask(void) -{ - if (!*ksocknal_tunables.ksnd_typed_conns) - return (1 << SOCKLND_CONN_ANY); - - return ((1 << SOCKLND_CONN_CONTROL) | - (1 << SOCKLND_CONN_BULK_IN) | - (1 << SOCKLND_CONN_BULK_OUT)); -} - -static inline struct list_head * -ksocknal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size; - - return (&ksocknal_data.ksnd_peers [hash]); -} - -static inline void -ksocknal_conn_addref (ksock_conn_t *conn) -{ - LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); - atomic_inc(&conn->ksnc_conn_refcount); -} - -extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn); - -static inline void -ksocknal_conn_decref (ksock_conn_t *conn) -{ - LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); - if (atomic_dec_and_test(&conn->ksnc_conn_refcount)) - ksocknal_queue_zombie_conn(conn); -} - -static inline int -ksocknal_connsock_addref (ksock_conn_t *conn) -{ - int rc = -ESHUTDOWN; - - read_lock (&ksocknal_data.ksnd_global_lock); - if (!conn->ksnc_closing) { - LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0); - atomic_inc(&conn->ksnc_sock_refcount); - rc = 0; - } - read_unlock (&ksocknal_data.ksnd_global_lock); - - return (rc); -} - -static inline void -ksocknal_connsock_decref (ksock_conn_t *conn) -{ - LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0); - if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) { - LASSERT (conn->ksnc_closing); - libcfs_sock_release(conn->ksnc_sock); - conn->ksnc_sock = NULL; - } -} - -static inline void -ksocknal_tx_addref (ksock_tx_t *tx) -{ - LASSERT (atomic_read(&tx->tx_refcount) > 0); - atomic_inc(&tx->tx_refcount); -} - -extern void ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx); - -static inline void -ksocknal_tx_decref (ksock_tx_t *tx) -{ - LASSERT (atomic_read(&tx->tx_refcount) > 0); - if (atomic_dec_and_test(&tx->tx_refcount)) - ksocknal_tx_done(NULL, tx); -} - -static inline void -ksocknal_route_addref (ksock_route_t *route) -{ - LASSERT (atomic_read(&route->ksnr_refcount) > 0); - atomic_inc(&route->ksnr_refcount); -} - -extern void ksocknal_destroy_route (ksock_route_t *route); - -static inline void -ksocknal_route_decref (ksock_route_t *route) -{ - LASSERT (atomic_read (&route->ksnr_refcount) > 0); - if (atomic_dec_and_test(&route->ksnr_refcount)) - ksocknal_destroy_route (route); -} - -static inline void -ksocknal_peer_addref (ksock_peer_t *peer) -{ - LASSERT (atomic_read (&peer->ksnp_refcount) > 0); - atomic_inc(&peer->ksnp_refcount); -} - -extern void ksocknal_destroy_peer (ksock_peer_t *peer); - -static inline void -ksocknal_peer_decref (ksock_peer_t *peer) -{ - LASSERT (atomic_read (&peer->ksnp_refcount) > 0); - if (atomic_dec_and_test(&peer->ksnp_refcount)) - ksocknal_destroy_peer (peer); -} - -int ksocknal_startup (lnet_ni_t *ni); -void ksocknal_shutdown (lnet_ni_t *ni); -int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int ksocknal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int ksocknal_accept(lnet_ni_t *ni, cfs_socket_t *sock); - -extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port); -extern ksock_peer_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id); -extern ksock_peer_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id); -extern void ksocknal_peer_failed (ksock_peer_t *peer); -extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, - cfs_socket_t *sock, int type); -extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); -extern void ksocknal_terminate_conn (ksock_conn_t *conn); -extern void ksocknal_destroy_conn (ksock_conn_t *conn); -extern int ksocknal_close_peer_conns_locked (ksock_peer_t *peer, - __u32 ipaddr, int why); -extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); -extern int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr); - -extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn); -extern void ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error); -extern void ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive); -extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg); -extern void ksocknal_thread_fini (void); -extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer); -extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); -extern int ksocknal_scheduler (void *arg); -extern int ksocknal_connd (void *arg); -extern int ksocknal_reaper (void *arg); -extern int ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, - lnet_nid_t peer_nid, ksock_hello_msg_t *hello); -extern int ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, - ksock_hello_msg_t *hello, lnet_process_id_t *id, - __u64 *incarnation); -extern void ksocknal_read_callback(ksock_conn_t *conn); -extern void ksocknal_write_callback(ksock_conn_t *conn); - -extern int ksocknal_lib_zc_capable(cfs_socket_t *sock); -extern void ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn); -extern void ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn); -extern void ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn); -extern void ksocknal_lib_push_conn (ksock_conn_t *conn); -extern void ksocknal_lib_bind_irq (unsigned int irq); -extern int ksocknal_lib_get_conn_addrs (ksock_conn_t *conn); -extern unsigned int ksocknal_lib_sock_irq (cfs_socket_t *sock); -extern int ksocknal_lib_setup_sock (cfs_socket_t *so); -extern int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx); -extern int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx); -extern void ksocknal_lib_eager_ack (ksock_conn_t *conn); -extern int ksocknal_lib_recv_iov (ksock_conn_t *conn); -extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn); -extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, - int *rxmem, int *nagle); - -extern int ksocknal_lib_tunables_init(void); -extern void ksocknal_lib_tunables_fini(void); - -extern void ksocknal_lib_csum_tx(ksock_tx_t *tx); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c deleted file mode 100644 index bf4cd0886bec4a05a6cd9d6cc5ec109815f4c8ed..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ /dev/null @@ -1,2810 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socklnd.h" - -ksock_tx_t * -ksocknal_alloc_tx (int size) -{ - ksock_tx_t *tx = NULL; - - if (size == KSOCK_NOOP_TX_SIZE) { - /* searching for a noop tx in free list */ - spin_lock(&ksocknal_data.ksnd_tx_lock); - - if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next, - ksock_tx_t, tx_list); - LASSERT(tx->tx_desc_size == size); - list_del(&tx->tx_list); - } - - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } - - if (tx == NULL) - LIBCFS_ALLOC(tx, size); - - if (tx == NULL) - return NULL; - - atomic_set(&tx->tx_refcount, 1); - tx->tx_desc_size = size; - atomic_inc(&ksocknal_data.ksnd_nactive_txs); - - return tx; -} - -void -ksocknal_free_tx (ksock_tx_t *tx) -{ - atomic_dec(&ksocknal_data.ksnd_nactive_txs); - - if (tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) { - /* it's a noop tx */ - spin_lock(&ksocknal_data.ksnd_tx_lock); - - list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs); - - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } else { - LIBCFS_FREE(tx, tx->tx_desc_size); - } -} - -void -ksocknal_init_msg(ksock_msg_t *msg, int type) -{ - msg->ksm_type = type; - msg->ksm_csum = 0; - msg->ksm_zc_req_cookie = 0; - msg->ksm_zc_ack_cookie = 0; -} - -int -ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct iovec *iov = tx->tx_iov; - int nob; - int rc; - - LASSERT (tx->tx_niov > 0); - - /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */ - rc = ksocknal_lib_send_iov(conn, tx); - - if (rc <= 0) /* sent nothing? */ - return (rc); - - nob = rc; - LASSERT (nob <= tx->tx_resid); - tx->tx_resid -= nob; - - /* "consume" iov */ - do { - LASSERT (tx->tx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); - iov->iov_len -= nob; - return (rc); - } - - nob -= iov->iov_len; - tx->tx_iov = ++iov; - tx->tx_niov--; - } while (nob != 0); - - return (rc); -} - -int -ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - lnet_kiov_t *kiov = tx->tx_kiov; - int nob; - int rc; - - LASSERT (tx->tx_niov == 0); - LASSERT (tx->tx_nkiov > 0); - - /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */ - rc = ksocknal_lib_send_kiov(conn, tx); - - if (rc <= 0) /* sent nothing? */ - return (rc); - - nob = rc; - LASSERT (nob <= tx->tx_resid); - tx->tx_resid -= nob; - - /* "consume" kiov */ - do { - LASSERT(tx->tx_nkiov > 0); - - if (nob < kiov->kiov_len) { - kiov->kiov_offset += nob; - kiov->kiov_len -= nob; - return rc; - } - - nob -= kiov->kiov_len; - tx->tx_kiov = ++kiov; - tx->tx_nkiov--; - } while (nob != 0); - - return (rc); -} - -int -ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) -{ - int rc; - int bufnob; - - if (ksocknal_data.ksnd_stall_tx != 0) { - cfs_pause(cfs_time_seconds(ksocknal_data.ksnd_stall_tx)); - } - - LASSERT (tx->tx_resid != 0); - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - return (-ESHUTDOWN); - } - - do { - if (ksocknal_data.ksnd_enomem_tx > 0) { - /* testing... */ - ksocknal_data.ksnd_enomem_tx--; - rc = -EAGAIN; - } else if (tx->tx_niov != 0) { - rc = ksocknal_send_iov (conn, tx); - } else { - rc = ksocknal_send_kiov (conn, tx); - } - - bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock); - if (rc > 0) /* sent something? */ - conn->ksnc_tx_bufnob += rc; /* account it */ - - if (bufnob < conn->ksnc_tx_bufnob) { - /* allocated send buffer bytes < computed; infer - * something got ACKed */ - conn->ksnc_tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_tx_bufnob = bufnob; - mb(); - } - - if (rc <= 0) { /* Didn't write anything? */ - ksock_sched_t *sched; - - if (rc == 0) /* some stacks return 0 instead of -EAGAIN */ - rc = -EAGAIN; - - if (rc != -EAGAIN) - break; - - /* Check if EAGAIN is due to memory pressure */ - - sched = conn->ksnc_scheduler; - spin_lock_bh (&sched->kss_lock); - - if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) && - !conn->ksnc_tx_ready) { - /* SOCK_NOSPACE is set when the socket fills - * and cleared in the write_space callback - * (which also sets ksnc_tx_ready). If - * SOCK_NOSPACE and ksnc_tx_ready are BOTH - * zero, I didn't fill the socket and - * write_space won't reschedule me, so I - * return -ENOMEM to get my caller to retry - * after a timeout */ - rc = -ENOMEM; - } - - spin_unlock_bh (&sched->kss_lock); - break; - } - - /* socket's wmem_queued now includes 'rc' bytes */ - atomic_sub (rc, &conn->ksnc_tx_nob); - rc = 0; - - } while (tx->tx_resid != 0); - - ksocknal_connsock_decref(conn); - return (rc); -} - -int -ksocknal_recv_iov (ksock_conn_t *conn) -{ - struct iovec *iov = conn->ksnc_rx_iov; - int nob; - int rc; - - LASSERT (conn->ksnc_rx_niov > 0); - - /* Never touch conn->ksnc_rx_iov or change connection - * status inside ksocknal_lib_recv_iov */ - rc = ksocknal_lib_recv_iov(conn); - - if (rc <= 0) - return (rc); - - /* received something... */ - nob = rc; - - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_rx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with setting rx_started */ - conn->ksnc_rx_started = 1; - - conn->ksnc_rx_nob_wanted -= nob; - conn->ksnc_rx_nob_left -= nob; - - do { - LASSERT (conn->ksnc_rx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_len -= nob; - iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob); - return (-EAGAIN); - } - - nob -= iov->iov_len; - conn->ksnc_rx_iov = ++iov; - conn->ksnc_rx_niov--; - } while (nob != 0); - - return (rc); -} - -int -ksocknal_recv_kiov (ksock_conn_t *conn) -{ - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - int nob; - int rc; - LASSERT (conn->ksnc_rx_nkiov > 0); - - /* Never touch conn->ksnc_rx_kiov or change connection - * status inside ksocknal_lib_recv_iov */ - rc = ksocknal_lib_recv_kiov(conn); - - if (rc <= 0) - return (rc); - - /* received something... */ - nob = rc; - - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_rx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with setting rx_started */ - conn->ksnc_rx_started = 1; - - conn->ksnc_rx_nob_wanted -= nob; - conn->ksnc_rx_nob_left -= nob; - - do { - LASSERT (conn->ksnc_rx_nkiov > 0); - - if (nob < kiov->kiov_len) { - kiov->kiov_offset += nob; - kiov->kiov_len -= nob; - return -EAGAIN; - } - - nob -= kiov->kiov_len; - conn->ksnc_rx_kiov = ++kiov; - conn->ksnc_rx_nkiov--; - } while (nob != 0); - - return 1; -} - -int -ksocknal_receive (ksock_conn_t *conn) -{ - /* Return 1 on success, 0 on EOF, < 0 on error. - * Caller checks ksnc_rx_nob_wanted to determine - * progress/completion. */ - int rc; - ENTRY; - - if (ksocknal_data.ksnd_stall_rx != 0) { - cfs_pause(cfs_time_seconds (ksocknal_data.ksnd_stall_rx)); - } - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - return (-ESHUTDOWN); - } - - for (;;) { - if (conn->ksnc_rx_niov != 0) - rc = ksocknal_recv_iov (conn); - else - rc = ksocknal_recv_kiov (conn); - - if (rc <= 0) { - /* error/EOF or partial receive */ - if (rc == -EAGAIN) { - rc = 1; - } else if (rc == 0 && conn->ksnc_rx_started) { - /* EOF in the middle of a message */ - rc = -EPROTO; - } - break; - } - - /* Completed a fragment */ - - if (conn->ksnc_rx_nob_wanted == 0) { - rc = 1; - break; - } - } - - ksocknal_connsock_decref(conn); - RETURN (rc); -} - -void -ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx) -{ - lnet_msg_t *lnetmsg = tx->tx_lnetmsg; - int rc = (tx->tx_resid == 0) ? 0 : -EIO; - ENTRY; - - LASSERT(ni != NULL || tx->tx_conn != NULL); - - if (tx->tx_conn != NULL) - ksocknal_conn_decref(tx->tx_conn); - - if (ni == NULL && tx->tx_conn != NULL) - ni = tx->tx_conn->ksnc_peer->ksnp_ni; - - ksocknal_free_tx (tx); - if (lnetmsg != NULL) /* KSOCK_MSG_NOOP go without lnetmsg */ - lnet_finalize (ni, lnetmsg, rc); - - EXIT; -} - -void -ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error) -{ - ksock_tx_t *tx; - - while (!list_empty (txlist)) { - tx = list_entry (txlist->next, ksock_tx_t, tx_list); - - if (error && tx->tx_lnetmsg != NULL) { - CDEBUG (D_NETERROR, "Deleting packet type %d len %d %s->%s\n", - le32_to_cpu (tx->tx_lnetmsg->msg_hdr.type), - le32_to_cpu (tx->tx_lnetmsg->msg_hdr.payload_length), - libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)), - libcfs_nid2str(le64_to_cpu (tx->tx_lnetmsg->msg_hdr.dest_nid))); - } else if (error) { - CDEBUG (D_NETERROR, "Deleting noop packet\n"); - } - - list_del (&tx->tx_list); - - LASSERT (atomic_read(&tx->tx_refcount) == 1); - ksocknal_tx_done (ni, tx); - } -} - -static void -ksocknal_check_zc_req(ksock_tx_t *tx) -{ - ksock_conn_t *conn = tx->tx_conn; - ksock_peer_t *peer = conn->ksnc_peer; - lnet_kiov_t *kiov = tx->tx_kiov; - int nkiov = tx->tx_nkiov; - - /* Set tx_msg.ksm_zc_req_cookie to a unique non-zero cookie and add tx - * to ksnp_zc_req_list if some fragment of this message should be sent - * zero-copy. Our peer will send an ACK containing this cookie when - * she has received this message to tell us we can signal completion. - * tx_msg.ksm_zc_req_cookie remains non-zero while tx is on - * ksnp_zc_req_list. */ - - if (conn->ksnc_proto != &ksocknal_protocol_v2x || - !conn->ksnc_zc_capable) - return; - - while (nkiov > 0) { - if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag) - break; - --nkiov; - ++kiov; - } - - if (nkiov == 0) - return; - - /* assign cookie and queue tx to pending list, it will be released when - * a matching ack is received. See ksocknal_handle_zc_ack() */ - - ksocknal_tx_addref(tx); - - spin_lock(&peer->ksnp_lock); - - LASSERT (tx->tx_msg.ksm_zc_req_cookie == 0); - tx->tx_msg.ksm_zc_req_cookie = peer->ksnp_zc_next_cookie++; - list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list); - - spin_unlock(&peer->ksnp_lock); -} - -static void -ksocknal_unzc_req(ksock_tx_t *tx) -{ - ksock_peer_t *peer = tx->tx_conn->ksnc_peer; - - spin_lock(&peer->ksnp_lock); - - if (tx->tx_msg.ksm_zc_req_cookie == 0) { - /* Not waiting for an ACK */ - spin_unlock(&peer->ksnp_lock); - return; - } - - tx->tx_msg.ksm_zc_req_cookie = 0; - list_del(&tx->tx_zc_list); - - spin_unlock(&peer->ksnp_lock); - - ksocknal_tx_decref(tx); -} - -int -ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) -{ - int rc; - - if (!tx->tx_checked_zc) { - tx->tx_checked_zc = 1; - ksocknal_check_zc_req(tx); - } - - rc = ksocknal_transmit (conn, tx); - - CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); - - if (tx->tx_resid == 0) { - /* Sent everything OK */ - LASSERT (rc == 0); - - return (0); - } - - if (rc == -EAGAIN) - return (rc); - - if (rc == -ENOMEM) { - static int counter; - - counter++; /* exponential backoff warnings */ - if ((counter & (-counter)) == counter) - CWARN("%u ENOMEM tx %p (%u allocated)\n", - counter, conn, atomic_read(&libcfs_kmemory)); - - /* Queue on ksnd_enomem_conns for retry after a timeout */ - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - - /* enomem list takes over scheduler's ref... */ - LASSERT (conn->ksnc_tx_scheduled); - list_add_tail(&conn->ksnc_tx_list, - &ksocknal_data.ksnd_enomem_conns); - if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(), - SOCKNAL_ENOMEM_RETRY), - ksocknal_data.ksnd_reaper_waketime)) - cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - return (rc); - } - - /* Actual error */ - LASSERT (rc < 0); - - if (!conn->ksnc_closing) { - switch (rc) { - case -ECONNRESET: - LCONSOLE_WARN("Host %u.%u.%u.%u reset our connection " - "while we were sending data; it may have " - "rebooted.\n", - HIPQUAD(conn->ksnc_ipaddr)); - break; - default: - LCONSOLE_WARN("There was an unexpected network error " - "while writing to %u.%u.%u.%u: %d.\n", - HIPQUAD(conn->ksnc_ipaddr), rc); - break; - } - CDEBUG(D_NET, "[%p] Error %d on write to %s" - " ip %d.%d.%d.%d:%d\n", conn, rc, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - } - - ksocknal_unzc_req(tx); - - /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings (conn, - (conn->ksnc_closing) ? 0 : rc); - - return (rc); -} - -void -ksocknal_launch_connection_locked (ksock_route_t *route) -{ - - /* called holding write lock on ksnd_global_lock */ - - LASSERT (!route->ksnr_scheduled); - LASSERT (!route->ksnr_connecting); - LASSERT ((ksocknal_route_mask() & ~route->ksnr_connected) != 0); - - route->ksnr_scheduled = 1; /* scheduling conn for connd */ - ksocknal_route_addref(route); /* extra ref for connd */ - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - - list_add_tail (&route->ksnr_connd_list, - &ksocknal_data.ksnd_connd_routes); - cfs_waitq_signal (&ksocknal_data.ksnd_connd_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); -} - -ksock_conn_t * -ksocknal_find_conn_locked (int payload_nob, ksock_peer_t *peer) -{ - struct list_head *tmp; - ksock_conn_t *typed = NULL; - int tnob = 0; - ksock_conn_t *fallback = NULL; - int fnob = 0; - ksock_conn_t *conn; - - list_for_each (tmp, &peer->ksnp_conns) { - ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list); - int hdr_nob = 0; -#if SOCKNAL_ROUND_ROBIN - const int nob = 0; -#else - int nob = atomic_read(&c->ksnc_tx_nob) + - SOCK_WMEM_QUEUED(c->ksnc_sock); -#endif - LASSERT (!c->ksnc_closing); - LASSERT (c->ksnc_proto != NULL); - - if (fallback == NULL || nob < fnob) { - fallback = c; - fnob = nob; - } - - if (!*ksocknal_tunables.ksnd_typed_conns) - continue; - - if (payload_nob == 0) { - /* noop packet */ - hdr_nob = offsetof(ksock_msg_t, ksm_u); - } else { - /* lnet packet */ - hdr_nob = (c->ksnc_proto == &ksocknal_protocol_v2x)? - offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload): - sizeof(lnet_hdr_t); - } - - switch (c->ksnc_type) { - default: - CERROR("ksnc_type bad: %u\n", c->ksnc_type); - LBUG(); - case SOCKLND_CONN_ANY: - break; - case SOCKLND_CONN_BULK_IN: - continue; - case SOCKLND_CONN_BULK_OUT: - if ((hdr_nob + payload_nob) < *ksocknal_tunables.ksnd_min_bulk) - continue; - break; - case SOCKLND_CONN_CONTROL: - if ((hdr_nob + payload_nob) >= *ksocknal_tunables.ksnd_min_bulk) - continue; - break; - } - - if (typed == NULL || nob < tnob) { - typed = c; - tnob = nob; - } - } - - /* prefer the typed selection */ - conn = (typed != NULL) ? typed : fallback; - -#if SOCKNAL_ROUND_ROBIN - if (conn != NULL) { - /* round-robin all else being equal */ - list_del (&conn->ksnc_list); - list_add_tail (&conn->ksnc_list, &peer->ksnp_conns); - } -#endif - return conn; -} - -void -ksocknal_next_mono_tx(ksock_conn_t *conn) -{ - ksock_tx_t *tx = conn->ksnc_tx_mono; - - /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v2x); - LASSERT(!list_empty(&conn->ksnc_tx_queue)); - LASSERT(tx != NULL); - - if (tx->tx_list.next == &conn->ksnc_tx_queue) { - /* no more packets queued */ - conn->ksnc_tx_mono = NULL; - } else { - conn->ksnc_tx_mono = list_entry(tx->tx_list.next, ksock_tx_t, tx_list); - LASSERT(conn->ksnc_tx_mono->tx_msg.ksm_type == tx->tx_msg.ksm_type); - } -} - -int -ksocknal_piggyback_zcack(ksock_conn_t *conn, __u64 cookie) -{ - ksock_tx_t *tx = conn->ksnc_tx_mono; - - /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ - - if (tx == NULL) - return 0; - - if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) { - /* tx is noop zc-ack, can't piggyback zc-ack cookie */ - return 0; - } - - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET); - LASSERT(tx->tx_msg.ksm_zc_ack_cookie == 0); - - /* piggyback the zc-ack cookie */ - tx->tx_msg.ksm_zc_ack_cookie = cookie; - ksocknal_next_mono_tx(conn); - - return 1; -} - -void -ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) -{ - ksock_sched_t *sched = conn->ksnc_scheduler; - ksock_msg_t *msg = &tx->tx_msg; - ksock_tx_t *ztx; - int bufnob = 0; - - /* called holding global lock (read or irq-write) and caller may - * not have dropped this lock between finding conn and calling me, - * so we don't need the {get,put}connsock dance to deref - * ksnc_sock... */ - LASSERT(!conn->ksnc_closing); - - CDEBUG (D_NET, "Sending to %s ip %d.%d.%d.%d:%d\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - - tx->tx_checked_zc = 0; - conn->ksnc_proto->pro_pack(tx); - - /* Ensure the frags we've been given EXACTLY match the number of - * bytes we want to send. Many TCP/IP stacks disregard any total - * size parameters passed to them and just look at the frags. - * - * We always expect at least 1 mapped fragment containing the - * complete ksocknal message header. */ - LASSERT (lnet_iov_nob (tx->tx_niov, tx->tx_iov) + - lnet_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob); - LASSERT (tx->tx_niov >= 1); - LASSERT (tx->tx_resid == tx->tx_nob); - - CDEBUG (D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n", - tx, (tx->tx_lnetmsg != NULL)? tx->tx_lnetmsg->msg_hdr.type: - KSOCK_MSG_NOOP, - tx->tx_nob, tx->tx_niov, tx->tx_nkiov); - - atomic_add (tx->tx_nob, &conn->ksnc_tx_nob); - tx->tx_conn = conn; - ksocknal_conn_addref(conn); /* +1 ref for tx */ - - /* - * NB Darwin: SOCK_WMEM_QUEUED()->sock_getsockopt() will take - * a blockable lock(socket lock), so SOCK_WMEM_QUEUED can't be - * put in spinlock. - */ - bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock); - spin_lock_bh (&sched->kss_lock); - - if (list_empty(&conn->ksnc_tx_queue) && bufnob == 0) { - /* First packet starts the timeout */ - conn->ksnc_tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - conn->ksnc_tx_bufnob = 0; - mb(); /* order with adding to tx_queue */ - } - - ztx = NULL; - - if (msg->ksm_type == KSOCK_MSG_NOOP) { - /* The packet is noop ZC ACK, try to piggyback the ack_cookie - * on a normal packet so I don't need to send it */ - LASSERT(msg->ksm_zc_req_cookie == 0); - LASSERT(msg->ksm_zc_ack_cookie != 0); - - if (conn->ksnc_tx_mono != NULL) { - if (ksocknal_piggyback_zcack(conn, msg->ksm_zc_ack_cookie)) { - /* zc-ack cookie is piggybacked */ - atomic_sub (tx->tx_nob, &conn->ksnc_tx_nob); - ztx = tx; /* Put to freelist later */ - } else { - /* no packet can piggyback zc-ack cookie */ - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } - } else { - /* It's the first mono-packet */ - conn->ksnc_tx_mono = tx; - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } - - } else { - /* It's a normal packet - can it piggback a noop zc-ack that - * has been queued already? */ - LASSERT(msg->ksm_zc_ack_cookie == 0); - - if (conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x packet */ - conn->ksnc_tx_mono != NULL) { - if (conn->ksnc_tx_mono->tx_msg.ksm_type == KSOCK_MSG_NOOP) { - /* There is a noop zc-ack can be piggybacked */ - ztx = conn->ksnc_tx_mono; - - msg->ksm_zc_ack_cookie = ztx->tx_msg.ksm_zc_ack_cookie; - ksocknal_next_mono_tx(conn); - - /* use tx to replace the noop zc-ack packet, ztx will - * be put to freelist later */ - list_add(&tx->tx_list, &ztx->tx_list); - list_del(&ztx->tx_list); - - atomic_sub (ztx->tx_nob, &conn->ksnc_tx_nob); - } else { - /* no noop zc-ack packet, just enqueue it */ - LASSERT(conn->ksnc_tx_mono->tx_msg.ksm_type == KSOCK_MSG_LNET); - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } - - } else if (conn->ksnc_proto == &ksocknal_protocol_v2x) { - /* it's the first mono-packet, enqueue it */ - conn->ksnc_tx_mono = tx; - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } else { - /* V1.x packet, just enqueue it */ - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } - } - - if (ztx != NULL) - list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs); - - if (conn->ksnc_tx_ready && /* able to send */ - !conn->ksnc_tx_scheduled) { /* not scheduled to send */ - /* +1 ref for scheduler */ - ksocknal_conn_addref(conn); - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - cfs_waitq_signal (&sched->kss_waitq); - } - - spin_unlock_bh (&sched->kss_lock); -} - -ksock_route_t * -ksocknal_find_connectable_route_locked (ksock_peer_t *peer) -{ - struct list_head *tmp; - ksock_route_t *route; - - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); - - LASSERT (!route->ksnr_connecting || route->ksnr_scheduled); - - if (route->ksnr_scheduled) /* connections being established */ - continue; - - /* all route types connected ? */ - if ((ksocknal_route_mask() & ~route->ksnr_connected) == 0) - continue; - - /* too soon to retry this guy? */ - if (!(route->ksnr_retry_interval == 0 || /* first attempt */ - cfs_time_aftereq (cfs_time_current(), - route->ksnr_timeout))) - continue; - - return (route); - } - - return (NULL); -} - -ksock_route_t * -ksocknal_find_connecting_route_locked (ksock_peer_t *peer) -{ - struct list_head *tmp; - ksock_route_t *route; - - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); - - LASSERT (!route->ksnr_connecting || route->ksnr_scheduled); - - if (route->ksnr_scheduled) - return (route); - } - - return (NULL); -} - -int -ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id) -{ - ksock_peer_t *peer; - ksock_conn_t *conn; - ksock_route_t *route; - rwlock_t *g_lock; - int retry; - int rc; - - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lnetmsg != NULL); - - g_lock = &ksocknal_data.ksnd_global_lock; - - for (retry = 0;; retry = 1) { -#if !SOCKNAL_ROUND_ROBIN - read_lock (g_lock); - peer = ksocknal_find_peer_locked(ni, id); - if (peer != NULL) { - if (ksocknal_find_connectable_route_locked(peer) == NULL) { - conn = ksocknal_find_conn_locked (tx->tx_lnetmsg->msg_len, peer); - if (conn != NULL) { - /* I've got no routes that need to be - * connecting and I do have an actual - * connection... */ - ksocknal_queue_tx_locked (tx, conn); - read_unlock (g_lock); - return (0); - } - } - } - - /* I'll need a write lock... */ - read_unlock (g_lock); -#endif - write_lock_bh (g_lock); - - peer = ksocknal_find_peer_locked(ni, id); - if (peer != NULL) - break; - - write_unlock_bh (g_lock); - - if ((id.pid & LNET_PID_USERFLAG) != 0) { - CERROR("Refusing to create a connection to " - "userspace process %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; - } - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; - } - - rc = ksocknal_add_peer(ni, id, - LNET_NIDADDR(id.nid), - lnet_acceptor_port()); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_id2str(id), rc); - return rc; - } - } - - for (;;) { - /* launch any/all connections that need it */ - route = ksocknal_find_connectable_route_locked (peer); - if (route == NULL) - break; - - ksocknal_launch_connection_locked (route); - } - - conn = ksocknal_find_conn_locked (tx->tx_lnetmsg->msg_len, peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - ksocknal_queue_tx_locked (tx, conn); - write_unlock_bh (g_lock); - return (0); - } - - if (peer->ksnp_accepting > 0 || - ksocknal_find_connecting_route_locked (peer) != NULL) { - /* Queue the message until a connection is established */ - list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue); - write_unlock_bh (g_lock); - return 0; - } - - write_unlock_bh (g_lock); - - /* NB Routes may be ignored if connections to them failed recently */ - CDEBUG(D_NETERROR, "No usable routes to %s\n", libcfs_id2str(id)); - return (-EHOSTUNREACH); -} - -int -ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - ksock_tx_t *tx; - int desc_size; - int rc; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it... */ - - CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - LASSERT (!in_interrupt ()); - - if (payload_iov != NULL) - desc_size = offsetof(ksock_tx_t, - tx_frags.virt.iov[1 + payload_niov]); - else - desc_size = offsetof(ksock_tx_t, - tx_frags.paged.kiov[payload_niov]); - - tx = ksocknal_alloc_tx(desc_size); - if (tx == NULL) { - CERROR("Can't allocate tx desc type %d size %d\n", - type, desc_size); - return (-ENOMEM); - } - - tx->tx_conn = NULL; /* set when assigned a conn */ - tx->tx_lnetmsg = lntmsg; - - if (payload_iov != NULL) { - tx->tx_kiov = NULL; - tx->tx_nkiov = 0; - tx->tx_iov = tx->tx_frags.virt.iov; - tx->tx_niov = 1 + - lnet_extract_iov(payload_niov, &tx->tx_iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); - } else { - tx->tx_niov = 1; - tx->tx_iov = &tx->tx_frags.paged.iov; - tx->tx_kiov = tx->tx_frags.paged.kiov; - tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov, - payload_niov, payload_kiov, - payload_offset, payload_nob); - } - - ksocknal_init_msg(&tx->tx_msg, KSOCK_MSG_LNET); - - /* The first fragment will be set later in pro_pack */ - rc = ksocknal_launch_packet(ni, tx, target); - if (rc == 0) - return (0); - - ksocknal_free_tx(tx); - return (-EIO); -} - -int -ksocknal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = cfs_kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - ksocknal_data.ksnd_nthreads++; - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - return (0); -} - -void -ksocknal_thread_fini (void) -{ - write_lock_bh (&ksocknal_data.ksnd_global_lock); - ksocknal_data.ksnd_nthreads--; - write_unlock_bh (&ksocknal_data.ksnd_global_lock); -} - -int -ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) -{ - static char ksocknal_slop_buffer[4096]; - - int nob; - unsigned int niov; - int skipped; - - LASSERT(conn->ksnc_proto != NULL); - - if ((*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0) { - /* Remind the socket to ack eagerly... */ - ksocknal_lib_eager_ack(conn); - } - - if (nob_to_skip == 0) { /* right at next packet boundary now */ - conn->ksnc_rx_started = 0; - mb (); /* racing with timeout thread */ - - switch (conn->ksnc_proto->pro_version) { - case KSOCK_PROTO_V2: - conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER; - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg; - - conn->ksnc_rx_nob_wanted = offsetof(ksock_msg_t, ksm_u); - conn->ksnc_rx_nob_left = offsetof(ksock_msg_t, ksm_u); - conn->ksnc_rx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u); - break; - - case KSOCK_PROTO_V1: - /* Receiving bare lnet_hdr_t */ - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_wanted = sizeof(lnet_hdr_t); - conn->ksnc_rx_nob_left = sizeof(lnet_hdr_t); - - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg; - conn->ksnc_rx_iov[0].iov_len = sizeof (lnet_hdr_t); - break; - - default: - LBUG (); - } - conn->ksnc_rx_niov = 1; - - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_csum = ~0; - return (1); - } - - /* Set up to skip as much as possible now. If there's more left - * (ran out of iov entries) we'll get called again */ - - conn->ksnc_rx_state = SOCKNAL_RX_SLOP; - conn->ksnc_rx_nob_left = nob_to_skip; - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - skipped = 0; - niov = 0; - - do { - nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer)); - - conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer; - conn->ksnc_rx_iov[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -=nob; - - } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ - niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec)); - - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_nob_wanted = skipped; - return (0); -} - -/* (Sink) handle incoming ZC request from sender */ -static int -ksocknal_handle_zc_req(ksock_peer_t *peer, __u64 cookie) -{ - ksock_conn_t *conn; - ksock_tx_t *tx; - ksock_sched_t *sched; - int rc; - - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = ksocknal_find_conn_locked (0, peer); - if (conn == NULL) { - read_unlock (&ksocknal_data.ksnd_global_lock); - CERROR("Can't find connection to send zcack.\n"); - return -ECONNRESET; - } - - sched = conn->ksnc_scheduler; - - spin_lock_bh (&sched->kss_lock); - rc = ksocknal_piggyback_zcack(conn, cookie); - spin_unlock_bh (&sched->kss_lock); - - read_unlock (&ksocknal_data.ksnd_global_lock); - if (rc) { - /* Ack cookie is piggybacked */ - return 0; - } - - tx = ksocknal_alloc_tx(KSOCK_NOOP_TX_SIZE); - if (tx == NULL) { - CERROR("Can't allocate noop tx desc\n"); - return -ENOMEM; - } - - tx->tx_conn = NULL; - tx->tx_lnetmsg = NULL; - tx->tx_kiov = NULL; - tx->tx_nkiov = 0; - tx->tx_iov = tx->tx_frags.virt.iov; - tx->tx_niov = 1; - - ksocknal_init_msg(&tx->tx_msg, KSOCK_MSG_NOOP); - tx->tx_msg.ksm_zc_ack_cookie = cookie; /* incoming cookie */ - - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = ksocknal_find_conn_locked (0, peer); - if (conn == NULL) { - read_unlock (&ksocknal_data.ksnd_global_lock); - ksocknal_free_tx(tx); - CERROR("Can't find connection to send zcack.\n"); - return -ECONNRESET; - } - ksocknal_queue_tx_locked(tx, conn); - - read_unlock (&ksocknal_data.ksnd_global_lock); - - return 0; -} - -/* (Sender) handle ZC_ACK from sink */ -static int -ksocknal_handle_zc_ack(ksock_peer_t *peer, __u64 cookie) -{ - ksock_tx_t *tx; - struct list_head *ctmp; - - spin_lock(&peer->ksnp_lock); - - list_for_each(ctmp, &peer->ksnp_zc_req_list) { - tx = list_entry (ctmp, ksock_tx_t, tx_zc_list); - if (tx->tx_msg.ksm_zc_req_cookie != cookie) - continue; - - tx->tx_msg.ksm_zc_req_cookie = 0; - list_del(&tx->tx_zc_list); - - spin_unlock(&peer->ksnp_lock); - - ksocknal_tx_decref(tx); - return 0; - } - spin_unlock(&peer->ksnp_lock); - - return -EPROTO; -} - -int -ksocknal_process_receive (ksock_conn_t *conn) -{ - int rc; - - LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); - - /* NB: sched lock NOT held */ - /* SOCKNAL_RX_LNET_HEADER is here for backward compatability */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_SLOP); - again: - if (conn->ksnc_rx_nob_wanted != 0) { - rc = ksocknal_receive(conn); - - if (rc <= 0) { - LASSERT (rc != -EAGAIN); - - if (rc == 0) - CDEBUG (D_NET, "[%p] EOF from %s" - " ip %d.%d.%d.%d:%d\n", conn, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - else if (!conn->ksnc_closing) - CERROR ("[%p] Error %d on read from %s" - " ip %d.%d.%d.%d:%d\n", - conn, rc, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - - /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings (conn, - (conn->ksnc_closing) ? 0 : rc); - return (rc == 0 ? -ESHUTDOWN : rc); - } - - if (conn->ksnc_rx_nob_wanted != 0) { - /* short read */ - return (-EAGAIN); - } - } - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_flip) { - __swab32s(&conn->ksnc_msg.ksm_type); - __swab32s(&conn->ksnc_msg.ksm_csum); - __swab64s(&conn->ksnc_msg.ksm_zc_req_cookie); - __swab64s(&conn->ksnc_msg.ksm_zc_ack_cookie); - } - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP && - conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - /* NOOP Checksum error */ - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return (-EIO); - } - - if (conn->ksnc_msg.ksm_zc_ack_cookie != 0) { - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v2x); - - rc = ksocknal_handle_zc_ack(conn->ksnc_peer, - conn->ksnc_msg.ksm_zc_ack_cookie); - if (rc != 0) { - CERROR("%s: Unknown zero copy ACK cookie: "LPU64"\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_zc_ack_cookie); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return (rc); - } - } - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) { - ksocknal_new_packet (conn, 0); - return 0; /* NOOP is done and just return */ - } - LASSERT (conn->ksnc_msg.ksm_type == KSOCK_MSG_LNET); - - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_wanted = sizeof(ksock_lnet_msg_t); - conn->ksnc_rx_nob_left = sizeof(ksock_lnet_msg_t); - - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg; - conn->ksnc_rx_iov[0].iov_len = sizeof(ksock_lnet_msg_t); - - conn->ksnc_rx_niov = 1; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - - goto again; /* read lnet header now */ - - case SOCKNAL_RX_LNET_HEADER: - /* unpack message header */ - conn->ksnc_proto->pro_unpack(&conn->ksnc_msg); - - if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) { - /* Userspace peer */ - lnet_process_id_t *id = &conn->ksnc_peer->ksnp_id; - lnet_hdr_t *lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; - - /* Substitute process ID assigned at connection time */ - lhdr->src_pid = cpu_to_le32(id->pid); - lhdr->src_nid = cpu_to_le64(id->nid); - } - - conn->ksnc_rx_state = SOCKNAL_RX_PARSE; - ksocknal_conn_addref(conn); /* ++ref while parsing */ - - rc = lnet_parse(conn->ksnc_peer->ksnp_ni, - &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr, - conn->ksnc_peer->ksnp_id.nid, conn, 0); - if (rc < 0) { - /* I just received garbage: give up on this conn */ - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings (conn, rc); - ksocknal_conn_decref(conn); - return (-EPROTO); - } - - /* I'm racing with ksocknal_recv() */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_PARSE || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD); - - if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD) - return 0; - - /* ksocknal_recv() got called */ - goto again; - - case SOCKNAL_RX_LNET_PAYLOAD: - /* payload all received */ - rc = 0; - - if (conn->ksnc_rx_nob_left == 0 && /* not truncating */ - conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - rc = -EIO; - } - - lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc); - - if (rc == 0 && conn->ksnc_msg.ksm_zc_req_cookie != 0) { - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v2x); - rc = ksocknal_handle_zc_req(conn->ksnc_peer, - conn->ksnc_msg.ksm_zc_req_cookie); - } - - if (rc != 0) { - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings (conn, rc); - return (-EPROTO); - } - /* Fall through */ - - case SOCKNAL_RX_SLOP: - /* starting new packet? */ - if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left)) - return 0; /* come back later */ - goto again; /* try to finish reading slop now */ - - default: - break; - } - - /* Not Reached */ - LBUG (); - return (-EINVAL); /* keep gcc happy */ -} - -int -ksocknal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - ksock_conn_t *conn = (ksock_conn_t *)private; - ksock_sched_t *sched = conn->ksnc_scheduler; - - LASSERT (mlen <= rlen); - LASSERT (niov <= LNET_MAX_IOV); - - conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_wanted = mlen; - conn->ksnc_rx_nob_left = rlen; - - if (mlen == 0 || iov != NULL) { - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; - conn->ksnc_rx_niov = - lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov, - niov, iov, offset, mlen); - } else { - conn->ksnc_rx_niov = 0; - conn->ksnc_rx_iov = NULL; - conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; - conn->ksnc_rx_nkiov = - lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov, - niov, kiov, offset, mlen); - } - - LASSERT (mlen == - lnet_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + - lnet_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); - - LASSERT (conn->ksnc_rx_scheduled); - - spin_lock_bh (&sched->kss_lock); - - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_PARSE_WAIT: - list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns); - cfs_waitq_signal (&sched->kss_waitq); - LASSERT (conn->ksnc_rx_ready); - break; - - case SOCKNAL_RX_PARSE: - /* scheduler hasn't noticed I'm parsing yet */ - break; - } - - conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD; - - spin_unlock_bh (&sched->kss_lock); - ksocknal_conn_decref(conn); - return (0); -} - -static inline int -ksocknal_sched_cansleep(ksock_sched_t *sched) -{ - int rc; - - spin_lock_bh (&sched->kss_lock); - - rc = (!ksocknal_data.ksnd_shuttingdown && - list_empty(&sched->kss_rx_conns) && - list_empty(&sched->kss_tx_conns)); - - spin_unlock_bh (&sched->kss_lock); - return (rc); -} - -int ksocknal_scheduler (void *arg) -{ - ksock_sched_t *sched = (ksock_sched_t *)arg; - ksock_conn_t *conn; - ksock_tx_t *tx; - int rc; - int nloops = 0; - int id = sched - ksocknal_data.ksnd_schedulers; - char name[16]; - - snprintf (name, sizeof (name),"socknal_sd%02d", id); - cfs_daemonize (name); - cfs_block_allsigs (); - -#if defined(CONFIG_SMP) && defined(CPU_AFFINITY) - id = ksocknal_sched2cpu(id); - if (cpu_online(id)) { - cpumask_t m = CPU_MASK_NONE; - cpu_set(id, m); - set_cpus_allowed(current, m); - } else { - CERROR ("Can't set CPU affinity for %s to %d\n", name, id); - } -#endif /* CONFIG_SMP && CPU_AFFINITY */ - - spin_lock_bh (&sched->kss_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - int did_something = 0; - - /* Ensure I progress everything semi-fairly */ - - if (!list_empty (&sched->kss_rx_conns)) { - conn = list_entry(sched->kss_rx_conns.next, - ksock_conn_t, ksnc_rx_list); - list_del(&conn->ksnc_rx_list); - - LASSERT(conn->ksnc_rx_scheduled); - LASSERT(conn->ksnc_rx_ready); - - /* clear rx_ready in case receive isn't complete. - * Do it BEFORE we call process_recv, since - * data_ready can set it any time after we release - * kss_lock. */ - conn->ksnc_rx_ready = 0; - spin_unlock_bh (&sched->kss_lock); - - rc = ksocknal_process_receive(conn); - - spin_lock_bh (&sched->kss_lock); - - /* I'm the only one that can clear this flag */ - LASSERT(conn->ksnc_rx_scheduled); - - /* Did process_receive get everything it wanted? */ - if (rc == 0) - conn->ksnc_rx_ready = 1; - - if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) { - /* Conn blocked waiting for ksocknal_recv() - * I change its state (under lock) to signal - * it can be rescheduled */ - conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT; - } else if (conn->ksnc_rx_ready) { - /* reschedule for rx */ - list_add_tail (&conn->ksnc_rx_list, - &sched->kss_rx_conns); - } else { - conn->ksnc_rx_scheduled = 0; - /* drop my ref */ - ksocknal_conn_decref(conn); - } - - did_something = 1; - } - - if (!list_empty (&sched->kss_tx_conns)) { - CFS_LIST_HEAD (zlist); - - if (!list_empty(&sched->kss_zombie_noop_txs)) { - list_add(&zlist, &sched->kss_zombie_noop_txs); - list_del_init(&sched->kss_zombie_noop_txs); - } - - conn = list_entry(sched->kss_tx_conns.next, - ksock_conn_t, ksnc_tx_list); - list_del (&conn->ksnc_tx_list); - - LASSERT(conn->ksnc_tx_scheduled); - LASSERT(conn->ksnc_tx_ready); - LASSERT(!list_empty(&conn->ksnc_tx_queue)); - - tx = list_entry(conn->ksnc_tx_queue.next, - ksock_tx_t, tx_list); - - if (conn->ksnc_tx_mono == tx) - ksocknal_next_mono_tx(conn); - - /* dequeue now so empty list => more to send */ - list_del(&tx->tx_list); - - /* Clear tx_ready in case send isn't complete. Do - * it BEFORE we call process_transmit, since - * write_space can set it any time after we release - * kss_lock. */ - conn->ksnc_tx_ready = 0; - spin_unlock_bh (&sched->kss_lock); - - if (!list_empty(&zlist)) { - /* free zombie noop txs, it's fast because - * noop txs are just put in freelist */ - ksocknal_txlist_done(NULL, &zlist, 0); - } - - rc = ksocknal_process_transmit(conn, tx); - - if (rc == -ENOMEM || rc == -EAGAIN) { - /* Incomplete send: replace tx on HEAD of tx_queue */ - spin_lock_bh (&sched->kss_lock); - list_add (&tx->tx_list, &conn->ksnc_tx_queue); - } else { - /* Complete send; tx -ref */ - ksocknal_tx_decref (tx); - - spin_lock_bh (&sched->kss_lock); - /* assume space for more */ - conn->ksnc_tx_ready = 1; - } - - if (rc == -ENOMEM) { - /* Do nothing; after a short timeout, this - * conn will be reposted on kss_tx_conns. */ - } else if (conn->ksnc_tx_ready && - !list_empty (&conn->ksnc_tx_queue)) { - /* reschedule for tx */ - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - } else { - conn->ksnc_tx_scheduled = 0; - /* drop my ref */ - ksocknal_conn_decref(conn); - } - - did_something = 1; - } - if (!did_something || /* nothing to do */ - ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */ - spin_unlock_bh (&sched->kss_lock); - - nloops = 0; - - if (!did_something) { /* wait for something to do */ - rc = wait_event_interruptible_exclusive( - sched->kss_waitq, - !ksocknal_sched_cansleep(sched)); - LASSERT (rc == 0); - } else { - our_cond_resched(); - } - - spin_lock_bh (&sched->kss_lock); - } - } - - spin_unlock_bh (&sched->kss_lock); - ksocknal_thread_fini (); - return (0); -} - -/* - * Add connection to kss_rx_conns of scheduler - * and wakeup the scheduler. - */ -void ksocknal_read_callback (ksock_conn_t *conn) -{ - ksock_sched_t *sched; - ENTRY; - - sched = conn->ksnc_scheduler; - - spin_lock_bh (&sched->kss_lock); - - conn->ksnc_rx_ready = 1; - - if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - cfs_waitq_signal (&sched->kss_waitq); - } - spin_unlock_bh (&sched->kss_lock); - - EXIT; -} - -/* - * Add connection to kss_tx_conns of scheduler - * and wakeup the scheduler. - */ -void ksocknal_write_callback (ksock_conn_t *conn) -{ - ksock_sched_t *sched; - ENTRY; - - sched = conn->ksnc_scheduler; - - spin_lock_bh (&sched->kss_lock); - - conn->ksnc_tx_ready = 1; - - if (!conn->ksnc_tx_scheduled && // not being progressed - !list_empty(&conn->ksnc_tx_queue)){//packets to send - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - cfs_waitq_signal (&sched->kss_waitq); - } - - spin_unlock_bh (&sched->kss_lock); - - EXIT; -} - -ksock_proto_t * -ksocknal_parse_proto_version (ksock_hello_msg_t *hello) -{ - if ((hello->kshm_magic == LNET_PROTO_MAGIC && - hello->kshm_version == KSOCK_PROTO_V2) || - (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC) && - hello->kshm_version == __swab32(KSOCK_PROTO_V2))) { -#if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol != 2) - return NULL; -#endif - return &ksocknal_protocol_v2x; - } - - if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { - lnet_magicversion_t *hmv = (lnet_magicversion_t *)hello; - - CLASSERT (sizeof (lnet_magicversion_t) == - offsetof (ksock_hello_msg_t, kshm_src_nid)); - - if (hmv->version_major == cpu_to_le16 (KSOCK_PROTO_V1_MAJOR) && - hmv->version_minor == cpu_to_le16 (KSOCK_PROTO_V1_MINOR)) - return &ksocknal_protocol_v1x; - } - - return NULL; -} - -static int -ksocknal_send_hello_v1 (ksock_conn_t *conn, ksock_hello_msg_t *hello) -{ - cfs_socket_t *sock = conn->ksnc_sock; - lnet_hdr_t *hdr; - lnet_magicversion_t *hmv; - int rc; - int i; - - CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid)); - - LIBCFS_ALLOC(hdr, sizeof(*hdr)); - if (hdr == NULL) { - CERROR("Can't allocate lnet_hdr_t\n"); - return -ENOMEM; - } - - hmv = (lnet_magicversion_t *)&hdr->dest_nid; - - /* Re-organize V2.x message header to V1.x (lnet_hdr_t) - * header and send out */ - hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC); - hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR); - hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - hmv->version_major++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - hmv->magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - hdr->src_nid = cpu_to_le64 (hello->kshm_src_nid); - hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid); - hdr->type = cpu_to_le32 (LNET_MSG_HELLO); - hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32)); - hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype); - hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation); - - rc = libcfs_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout()); - - if (rc != 0) { - CDEBUG (D_NETERROR, "Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n", - rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - goto out; - } - - if (hello->kshm_nips == 0) - goto out; - - for (i = 0; i < hello->kshm_nips; i++) { - hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]); - } - - rc = libcfs_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc != 0) { - CDEBUG (D_NETERROR, "Error %d sending HELLO payload (%d)" - " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips, - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - } -out: - LIBCFS_FREE(hdr, sizeof(*hdr)); - - return rc; -} - -static int -ksocknal_send_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello) -{ - cfs_socket_t *sock = conn->ksnc_sock; - int rc; - - hello->kshm_magic = LNET_PROTO_MAGIC; - hello->kshm_version = KSOCK_PROTO_V2; - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - hello->kshm_version++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - LNET_UNLOCK(); - } - - rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips), - lnet_acceptor_timeout()); - - if (rc != 0) { - CDEBUG (D_NETERROR, "Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n", - rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - return rc; - } - - if (hello->kshm_nips == 0) - return 0; - - rc = libcfs_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc != 0) { - CDEBUG (D_NETERROR, "Error %d sending HELLO payload (%d)" - " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips, - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - } - - return rc; -} - -static int -ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,int timeout) -{ - cfs_socket_t *sock = conn->ksnc_sock; - lnet_hdr_t *hdr; - int rc; - int i; - - LIBCFS_ALLOC(hdr, sizeof(*hdr)); - if (hdr == NULL) { - CERROR("Can't allocate lnet_hdr_t\n"); - return -ENOMEM; - } - - rc = libcfs_sock_read(sock, &hdr->src_nid, - sizeof (*hdr) - offsetof (lnet_hdr_t, src_nid), - timeout); - if (rc != 0) { - CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - goto out; - } - - /* ...and check we got what we expected */ - if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) { - CERROR ("Expecting a HELLO hdr," - " but got type %d from %u.%u.%u.%u\n", - le32_to_cpu (hdr->type), - HIPQUAD(conn->ksnc_ipaddr)); - rc = -EPROTO; - goto out; - } - - hello->kshm_src_nid = le64_to_cpu (hdr->src_nid); - hello->kshm_src_pid = le32_to_cpu (hdr->src_pid); - hello->kshm_src_incarnation = le64_to_cpu (hdr->msg.hello.incarnation); - hello->kshm_ctype = le32_to_cpu (hdr->msg.hello.type); - hello->kshm_nips = le32_to_cpu (hdr->payload_length) / - sizeof (__u32); - - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %u.%u.%u.%u\n", - hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr)); - rc = -EPROTO; - goto out; - } - - if (hello->kshm_nips == 0) - goto out; - - rc = libcfs_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { - CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - goto out; - } - - for (i = 0; i < hello->kshm_nips; i++) { - hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); - - if (hello->kshm_ips[i] == 0) { - CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n", - i, HIPQUAD(conn->ksnc_ipaddr)); - rc = -EPROTO; - break; - } - } -out: - LIBCFS_FREE(hdr, sizeof(*hdr)); - - return rc; -} - -static int -ksocknal_recv_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout) -{ - cfs_socket_t *sock = conn->ksnc_sock; - int rc; - int i; - - if (hello->kshm_magic == LNET_PROTO_MAGIC) - conn->ksnc_flip = 0; - else - conn->ksnc_flip = 1; - - rc = libcfs_sock_read(sock, &hello->kshm_src_nid, - offsetof(ksock_hello_msg_t, kshm_ips) - - offsetof(ksock_hello_msg_t, kshm_src_nid), - timeout); - if (rc != 0) { - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - return rc; - } - - if (conn->ksnc_flip) { - __swab32s(&hello->kshm_src_pid); - __swab64s(&hello->kshm_src_nid); - __swab32s(&hello->kshm_dst_pid); - __swab64s(&hello->kshm_dst_nid); - __swab64s(&hello->kshm_src_incarnation); - __swab64s(&hello->kshm_dst_incarnation); - __swab32s(&hello->kshm_ctype); - __swab32s(&hello->kshm_nips); - } - - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %u.%u.%u.%u\n", - hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - - if (hello->kshm_nips == 0) - return 0; - - rc = libcfs_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { - CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - return rc; - } - - for (i = 0; i < hello->kshm_nips; i++) { - if (conn->ksnc_flip) - __swab32s(&hello->kshm_ips[i]); - - if (hello->kshm_ips[i] == 0) { - CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n", - i, HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - } - - return 0; -} - -static void -ksocknal_pack_msg_v1(ksock_tx_t *tx) -{ - /* V1.x has no KSOCK_MSG_NOOP */ - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_lnetmsg != NULL); - - tx->tx_iov[0].iov_base = (void *)&tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t); - - tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t); -} - -static void -ksocknal_pack_msg_v2(ksock_tx_t *tx) -{ - tx->tx_iov[0].iov_base = (void *)&tx->tx_msg; - - if (tx->tx_lnetmsg != NULL) { - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - - tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload); - tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload) + - tx->tx_lnetmsg->msg_len; - } else { - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); - } - /* Don't checksum before start sending, because packet can be piggybacked with ACK */ -} - -static void -ksocknal_unpack_msg_v1(ksock_msg_t *msg) -{ - msg->ksm_type = KSOCK_MSG_LNET; - msg->ksm_csum = 0; - msg->ksm_zc_req_cookie = 0; - msg->ksm_zc_ack_cookie = 0; -} - -static void -ksocknal_unpack_msg_v2(ksock_msg_t *msg) -{ - return; /* Do nothing */ -} - -ksock_proto_t ksocknal_protocol_v1x = -{ - KSOCK_PROTO_V1, - ksocknal_send_hello_v1, - ksocknal_recv_hello_v1, - ksocknal_pack_msg_v1, - ksocknal_unpack_msg_v1 -}; - -ksock_proto_t ksocknal_protocol_v2x = -{ - KSOCK_PROTO_V2, - ksocknal_send_hello_v2, - ksocknal_recv_hello_v2, - ksocknal_pack_msg_v2, - ksocknal_unpack_msg_v2 -}; - -int -ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, - lnet_nid_t peer_nid, ksock_hello_msg_t *hello) -{ - /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */ - ksock_net_t *net = (ksock_net_t *)ni->ni_data; - lnet_nid_t srcnid; - - LASSERT (0 <= hello->kshm_nips && hello->kshm_nips <= LNET_MAX_INTERFACES); - - /* rely on caller to hold a ref on socket so it wouldn't disappear */ - LASSERT (conn->ksnc_proto != NULL); - - srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, peer_nid); - - hello->kshm_src_nid = srcnid; - hello->kshm_dst_nid = peer_nid; - hello->kshm_src_pid = the_lnet.ln_pid; - - hello->kshm_src_incarnation = net->ksnn_incarnation; - hello->kshm_ctype = conn->ksnc_type; - - return conn->ksnc_proto->pro_send_hello(conn, hello); -} - -int -ksocknal_invert_type(int type) -{ - switch (type) - { - case SOCKLND_CONN_ANY: - case SOCKLND_CONN_CONTROL: - return (type); - case SOCKLND_CONN_BULK_IN: - return SOCKLND_CONN_BULK_OUT; - case SOCKLND_CONN_BULK_OUT: - return SOCKLND_CONN_BULK_IN; - default: - return (SOCKLND_CONN_NONE); - } -} - -int -ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, - ksock_hello_msg_t *hello, lnet_process_id_t *peerid, - __u64 *incarnation) -{ - /* Return < 0 fatal error - * 0 success - * EALREADY lost connection race - * EPROTO protocol version mismatch - */ - cfs_socket_t *sock = conn->ksnc_sock; - int active = (conn->ksnc_proto != NULL); - int timeout; - int proto_match; - int rc; - ksock_proto_t *proto; - lnet_process_id_t recv_id; - - /* socket type set on active connections - not set on passive */ - LASSERT (!active == !(conn->ksnc_type != SOCKLND_CONN_NONE)); - - timeout = active ? *ksocknal_tunables.ksnd_timeout : - lnet_acceptor_timeout(); - - rc = libcfs_sock_read(sock, &hello->kshm_magic, sizeof (hello->kshm_magic), timeout); - if (rc != 0) { - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0); - return rc; - } - - if (hello->kshm_magic != LNET_PROTO_MAGIC && - hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) && - hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) { - /* Unexpected magic! */ - if (active || - the_lnet.ln_ptlcompat == 0) { - CERROR ("Bad magic(1) %#08x (%#08x expected) from " - "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic), - LNET_PROTO_TCP_MAGIC, - HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - - /* When portals compatibility is set, I may be passed a new - * connection "blindly" by the acceptor, and I have to - * determine if my peer has sent an acceptor connection request - * or not. This isn't a 'hello', so I'll get the acceptor to - * look at it... */ - rc = lnet_accept(ni, sock, hello->kshm_magic); - if (rc != 0) - return -EPROTO; - - /* ...and if it's OK I'm back to looking for a 'hello'... */ - rc = libcfs_sock_read(sock, &hello->kshm_magic, - sizeof (hello->kshm_magic), timeout); - if (rc != 0) { - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0); - return rc; - } - - /* Only need to check V1.x magic */ - if (hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) { - CERROR ("Bad magic(2) %#08x (%#08x expected) from " - "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic), - LNET_PROTO_TCP_MAGIC, - HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - } - - rc = libcfs_sock_read(sock, &hello->kshm_version, - sizeof(hello->kshm_version), timeout); - if (rc != 0) { - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0); - return rc; - } - - proto = ksocknal_parse_proto_version(hello); - if (proto == NULL) { - if (!active) { - /* unknown protocol from peer, tell peer my protocol */ - conn->ksnc_proto = &ksocknal_protocol_v2x; -#if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol != 2) - conn->ksnc_proto = &ksocknal_protocol_v1x; -#endif - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, ni->ni_nid, hello); - } - - CERROR ("Unknown protocol version (%d.x expected)" - " from %u.%u.%u.%u\n", - conn->ksnc_proto->pro_version, - HIPQUAD(conn->ksnc_ipaddr)); - - return -EPROTO; - } - - proto_match = (conn->ksnc_proto == proto); - conn->ksnc_proto = proto; - - /* receive the rest of hello message anyway */ - rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout); - if (rc != 0) { - CERROR("Error %d reading or checking hello from from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0); - return rc; - } - - *incarnation = hello->kshm_src_incarnation; - - if (hello->kshm_src_nid == LNET_NID_ANY) { - CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY" - "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - - if (!active && - conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - /* Userspace NAL assigns peer process ID from socket */ - recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG; - recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr); - } else { - recv_id.nid = hello->kshm_src_nid; - - if (the_lnet.ln_ptlcompat > 1 && /* portals peers may exist */ - LNET_NIDNET(recv_id.nid) == 0) /* this is one */ - recv_id.pid = the_lnet.ln_pid; /* give it a sensible pid */ - else - recv_id.pid = hello->kshm_src_pid; - - } - - if (!active) { - *peerid = recv_id; - - /* peer determines type */ - conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype); - if (conn->ksnc_type == SOCKLND_CONN_NONE) { - CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n", - hello->kshm_ctype, libcfs_id2str(*peerid), - HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - - return 0; - } - - if (peerid->pid != recv_id.pid || - !lnet_ptlcompat_matchnid(peerid->nid, recv_id.nid)) { - LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host" - " %u.%u.%u.%u, but they claimed they were " - "%s; please check your Lustre " - "configuration.\n", - libcfs_id2str(*peerid), - HIPQUAD(conn->ksnc_ipaddr), - libcfs_id2str(recv_id)); - return -EPROTO; - } - - if (hello->kshm_ctype == SOCKLND_CONN_NONE) { - /* Possible protocol mismatch or I lost the connection race */ - return proto_match ? EALREADY : EPROTO; - } - - if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) { - CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n", - conn->ksnc_type, libcfs_id2str(*peerid), - HIPQUAD(conn->ksnc_ipaddr), - hello->kshm_ctype); - return -EPROTO; - } - - return 0; -} - -void -ksocknal_connect (ksock_route_t *route) -{ - CFS_LIST_HEAD (zombies); - ksock_peer_t *peer = route->ksnr_peer; - int type; - int wanted; - cfs_socket_t *sock; - cfs_time_t deadline; - int retry_later = 0; - int rc = 0; - - deadline = cfs_time_add(cfs_time_current(), - cfs_time_seconds(*ksocknal_tunables.ksnd_timeout)); - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - LASSERT (route->ksnr_scheduled); - LASSERT (!route->ksnr_connecting); - - route->ksnr_connecting = 1; - - for (;;) { - wanted = ksocknal_route_mask() & ~route->ksnr_connected; - - /* stop connecting if peer/route got closed under me, or - * route got connected while queued */ - if (peer->ksnp_closing || route->ksnr_deleted || - wanted == 0) { - retry_later = 0; - break; - } - - /* reschedule if peer is connecting to me */ - if (peer->ksnp_accepting > 0) { - CDEBUG(D_NET, - "peer %s(%d) already connecting to me, retry later.\n", - libcfs_nid2str(peer->ksnp_id.nid), peer->ksnp_accepting); - retry_later = 1; - } - - if (retry_later) /* needs reschedule */ - break; - - if ((wanted & (1 << SOCKLND_CONN_ANY)) != 0) { - type = SOCKLND_CONN_ANY; - } else if ((wanted & (1 << SOCKLND_CONN_CONTROL)) != 0) { - type = SOCKLND_CONN_CONTROL; - } else if ((wanted & (1 << SOCKLND_CONN_BULK_IN)) != 0) { - type = SOCKLND_CONN_BULK_IN; - } else { - LASSERT ((wanted & (1 << SOCKLND_CONN_BULK_OUT)) != 0); - type = SOCKLND_CONN_BULK_OUT; - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - if (cfs_time_aftereq(cfs_time_current(), deadline)) { - rc = -ETIMEDOUT; - lnet_connect_console_error(rc, peer->ksnp_id.nid, - route->ksnr_ipaddr, - route->ksnr_port); - goto failed; - } - - rc = lnet_connect(&sock, peer->ksnp_id.nid, - route->ksnr_myipaddr, - route->ksnr_ipaddr, route->ksnr_port); - if (rc != 0) - goto failed; - - rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type); - if (rc < 0) { - lnet_connect_console_error(rc, peer->ksnp_id.nid, - route->ksnr_ipaddr, - route->ksnr_port); - goto failed; - } - - /* A +ve RC means I have to retry because I lost the connection - * race or I have to renegotiate protocol version */ - retry_later = (rc != 0); - if (retry_later) - CDEBUG(D_NET, "peer %s: conn race, retry later.\n", - libcfs_nid2str(peer->ksnp_id.nid)); - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - } - - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - - if (retry_later) { - /* re-queue for attention; this frees me up to handle - * the peer's incoming connection request */ - ksocknal_launch_connection_locked(route); - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - return; - - failed: - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - - /* This is a retry rather than a new connection */ - route->ksnr_retry_interval *= 2; - route->ksnr_retry_interval = - MAX(route->ksnr_retry_interval, - cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000); - route->ksnr_retry_interval = - MIN(route->ksnr_retry_interval, - cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms)/1000); - - LASSERT (route->ksnr_retry_interval != 0); - route->ksnr_timeout = cfs_time_add(cfs_time_current(), - route->ksnr_retry_interval); - - if (!list_empty(&peer->ksnp_tx_queue) && - peer->ksnp_accepting == 0 && - ksocknal_find_connecting_route_locked(peer) == NULL) { - /* ksnp_tx_queue is queued on a conn on successful - * connection */ - LASSERT (list_empty (&peer->ksnp_conns)); - - /* take all the blocked packets while I've got the lock and - * complete below... */ - list_add(&zombies, &peer->ksnp_tx_queue); - list_del_init(&peer->ksnp_tx_queue); - } - -#if 0 /* irrelevent with only eager routes */ - if (!route->ksnr_deleted) { - /* make this route least-favourite for re-selection */ - list_del(&route->ksnr_list); - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - } -#endif - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - ksocknal_peer_failed(peer); - ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1); -} - -static inline int -ksocknal_connd_connect_route_locked(void) -{ - /* Only handle an outgoing connection request if there is someone left - * to handle incoming connections */ - return !list_empty(&ksocknal_data.ksnd_connd_routes) && - ((ksocknal_data.ksnd_connd_connecting + 1) < - *ksocknal_tunables.ksnd_nconnds); -} - -static inline int -ksocknal_connd_ready(void) -{ - int rc; - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - - rc = ksocknal_data.ksnd_shuttingdown || - !list_empty(&ksocknal_data.ksnd_connd_connreqs) || - ksocknal_connd_connect_route_locked(); - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - return rc; -} - -int -ksocknal_connd (void *arg) -{ - long id = (long)arg; - char name[16]; - ksock_connreq_t *cr; - ksock_route_t *route; - - snprintf (name, sizeof (name), "socknal_cd%02ld", id); - cfs_daemonize (name); - cfs_block_allsigs (); - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - - if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) { - /* Connection accepted by the listener */ - cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next, - ksock_connreq_t, ksncr_list); - - list_del(&cr->ksncr_list); - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - ksocknal_create_conn(cr->ksncr_ni, NULL, - cr->ksncr_sock, SOCKLND_CONN_NONE); - lnet_ni_decref(cr->ksncr_ni); - LIBCFS_FREE(cr, sizeof(*cr)); - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - } - - if (ksocknal_connd_connect_route_locked()) { - /* Connection request */ - route = list_entry (ksocknal_data.ksnd_connd_routes.next, - ksock_route_t, ksnr_connd_list); - - list_del (&route->ksnr_connd_list); - ksocknal_data.ksnd_connd_connecting++; - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - ksocknal_connect (route); - ksocknal_route_decref(route); - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - ksocknal_data.ksnd_connd_connecting--; - } - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - wait_event_interruptible_exclusive( - ksocknal_data.ksnd_connd_waitq, - ksocknal_connd_ready()); - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - } - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - ksocknal_thread_fini (); - return (0); -} - -ksock_conn_t * -ksocknal_find_timed_out_conn (ksock_peer_t *peer) -{ - /* We're called with a shared lock on ksnd_global_lock */ - ksock_conn_t *conn; - struct list_head *ctmp; - - list_for_each (ctmp, &peer->ksnp_conns) { - int error; - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - - /* Don't need the {get,put}connsock dance to deref ksnc_sock */ - LASSERT (!conn->ksnc_closing); - - /* SOCK_ERROR will reset error code of socket in - * some platform (like Darwin8.x) */ - error = SOCK_ERROR(conn->ksnc_sock); - if (error != 0) { - ksocknal_conn_addref(conn); - - switch (error) { - case ECONNRESET: - CDEBUG(D_NETERROR, "A connection with %s " - "(%u.%u.%u.%u:%d) was reset; " - "it may have rebooted.\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - break; - case ETIMEDOUT: - CDEBUG(D_NETERROR, "A connection with %s " - "(%u.%u.%u.%u:%d) timed out; the " - "network or node may be down.\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - break; - default: - CDEBUG(D_NETERROR, "An unexpected network error %d " - "occurred with %s " - "(%u.%u.%u.%u:%d\n", error, - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - break; - } - - return (conn); - } - - if (conn->ksnc_rx_started && - cfs_time_aftereq(cfs_time_current(), - conn->ksnc_rx_deadline)) { - /* Timed out incomplete incoming message */ - ksocknal_conn_addref(conn); - CDEBUG(D_NETERROR, "Timeout receiving from %s " - "(%u.%u.%u.%u:%d), state %d wanted %d left %d\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port, - conn->ksnc_rx_state, - conn->ksnc_rx_nob_wanted, - conn->ksnc_rx_nob_left); - return (conn); - } - - if ((!list_empty(&conn->ksnc_tx_queue) || - SOCK_WMEM_QUEUED(conn->ksnc_sock) != 0) && - cfs_time_aftereq(cfs_time_current(), - conn->ksnc_tx_deadline)) { - /* Timed out messages queued for sending or - * buffered in the socket's send buffer */ - ksocknal_conn_addref(conn); - CDEBUG(D_NETERROR, "Timeout sending data to %s " - "(%u.%u.%u.%u:%d) the network or that " - "node may be down.\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - return (conn); - } - } - - return (NULL); -} - -void -ksocknal_check_peer_timeouts (int idx) -{ - struct list_head *peers = &ksocknal_data.ksnd_peers[idx]; - struct list_head *ptmp; - ksock_peer_t *peer; - ksock_conn_t *conn; - - again: - /* NB. We expect to have a look at all the peers and not find any - * connections to time out, so we just use a shared lock while we - * take a look... */ - read_lock (&ksocknal_data.ksnd_global_lock); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - conn = ksocknal_find_timed_out_conn (peer); - - if (conn != NULL) { - read_unlock (&ksocknal_data.ksnd_global_lock); - - ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); - - /* NB we won't find this one again, but we can't - * just proceed with the next peer, since we dropped - * ksnd_global_lock and it might be dead already! */ - ksocknal_conn_decref(conn); - goto again; - } - } - - read_unlock (&ksocknal_data.ksnd_global_lock); -} - -int -ksocknal_reaper (void *arg) -{ - cfs_waitlink_t wait; - ksock_conn_t *conn; - ksock_sched_t *sched; - struct list_head enomem_conns; - int nenomem_conns; - cfs_duration_t timeout; - int i; - int peer_index = 0; - cfs_time_t deadline = cfs_time_current(); - - cfs_daemonize ("socknal_reaper"); - cfs_block_allsigs (); - - CFS_INIT_LIST_HEAD(&enomem_conns); - cfs_waitlink_init (&wait); - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - - if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) { - conn = list_entry (ksocknal_data.ksnd_deathrow_conns.next, - ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - - ksocknal_terminate_conn (conn); - ksocknal_conn_decref(conn); - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - continue; - } - - if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) { - conn = list_entry (ksocknal_data.ksnd_zombie_conns.next, - ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - - ksocknal_destroy_conn (conn); - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - continue; - } - - if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) { - list_add(&enomem_conns, &ksocknal_data.ksnd_enomem_conns); - list_del_init(&ksocknal_data.ksnd_enomem_conns); - } - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - - /* reschedule all the connections that stalled with ENOMEM... */ - nenomem_conns = 0; - while (!list_empty (&enomem_conns)) { - conn = list_entry (enomem_conns.next, - ksock_conn_t, ksnc_tx_list); - list_del (&conn->ksnc_tx_list); - - sched = conn->ksnc_scheduler; - - spin_lock_bh (&sched->kss_lock); - - LASSERT (conn->ksnc_tx_scheduled); - conn->ksnc_tx_ready = 1; - list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns); - cfs_waitq_signal (&sched->kss_waitq); - - spin_unlock_bh (&sched->kss_lock); - nenomem_conns++; - } - - /* careful with the jiffy wrap... */ - while ((timeout = cfs_time_sub(deadline, - cfs_time_current())) <= 0) { - const int n = 4; - const int p = 1; - int chunk = ksocknal_data.ksnd_peer_hash_size; - - /* Time to check for timeouts on a few more peers: I do - * checks every 'p' seconds on a proportion of the peer - * table and I need to check every connection 'n' times - * within a timeout interval, to ensure I detect a - * timeout on any connection within (n+1)/n times the - * timeout interval. */ - - if (*ksocknal_tunables.ksnd_timeout > n * p) - chunk = (chunk * n * p) / - *ksocknal_tunables.ksnd_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - ksocknal_check_peer_timeouts (peer_index); - peer_index = (peer_index + 1) % - ksocknal_data.ksnd_peer_hash_size; - } - - deadline = cfs_time_add(deadline, cfs_time_seconds(p)); - } - - if (nenomem_conns != 0) { - /* Reduce my timeout if I rescheduled ENOMEM conns. - * This also prevents me getting woken immediately - * if any go back on my enomem list. */ - timeout = SOCKNAL_ENOMEM_RETRY; - } - ksocknal_data.ksnd_reaper_waketime = - cfs_time_add(cfs_time_current(), timeout); - - set_current_state (TASK_INTERRUPTIBLE); - cfs_waitq_add (&ksocknal_data.ksnd_reaper_waitq, &wait); - - if (!ksocknal_data.ksnd_shuttingdown && - list_empty (&ksocknal_data.ksnd_deathrow_conns) && - list_empty (&ksocknal_data.ksnd_zombie_conns)) - cfs_waitq_timedwait (&wait, CFS_TASK_INTERRUPTIBLE, timeout); - - set_current_state (TASK_RUNNING); - cfs_waitq_del (&ksocknal_data.ksnd_reaper_waitq, &wait); - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - } - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - - ksocknal_thread_fini (); - return (0); -} diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.c b/lnet/klnds/socklnd/socklnd_lib-darwin.c deleted file mode 100644 index 25d6b453197e877ee1662d85127a7636c531c333..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-darwin.c +++ /dev/null @@ -1,1072 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#include <mach/mach_types.h> -#include <string.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <sys/file.h> - -#include "socklnd.h" - -# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - -SYSCTL_DECL(_lnet); - -SYSCTL_NODE (_lnet, OID_AUTO, ksocknal, CTLFLAG_RW, - 0, "ksocknal_sysctl"); - -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, timeout, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_timeout, - 0, "timeout"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, credits, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_credits, - 0, "credits"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, peer_credits, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_peercredits, - 0, "peer_credits"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nconnds, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nconnds, - 0, "nconnds"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_reconnectms, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_reconnectms, - 0, "min_reconnectms"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, max_reconnectms, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_max_reconnectms, - 0, "max_reconnectms"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, eager_ack, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_eager_ack, - 0, "eager_ack"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, typed, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_typed_conns, - 0, "typed"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_bulk, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_bulk, - 0, "min_bulk"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, rx_buffer_size, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_rx_buffer_size, - 0, "rx_buffer_size"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, tx_buffer_size, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_tx_buffer_size, - 0, "tx_buffer_size"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nagle, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nagle, - 0, "nagle"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_idle, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_idle, - 0, "keepalive_idle"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_count, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_count, - 0, "keepalive_count"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_intvl, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_intvl, - 0, "keepalive_intvl"); - -cfs_sysctl_table_t ksocknal_top_ctl_table [] = { - &sysctl__lnet_ksocknal, - &sysctl__lnet_ksocknal_timeout, - &sysctl__lnet_ksocknal_credits, - &sysctl__lnet_ksocknal_peer_credits, - &sysctl__lnet_ksocknal_nconnds, - &sysctl__lnet_ksocknal_min_reconnectms, - &sysctl__lnet_ksocknal_max_reconnectms, - &sysctl__lnet_ksocknal_eager_ack, - &sysctl__lnet_ksocknal_typed, - &sysctl__lnet_ksocknal_min_bulk, - &sysctl__lnet_ksocknal_rx_buffer_size, - &sysctl__lnet_ksocknal_tx_buffer_size, - &sysctl__lnet_ksocknal_nagle, - &sysctl__lnet_ksocknal_keepalive_idle, - &sysctl__lnet_ksocknal_keepalive_count, - &sysctl__lnet_ksocknal_keepalive_intvl, - NULL -}; - -int -ksocknal_lib_tunables_init () -{ - ksocknal_tunables.ksnd_sysctl = - cfs_register_sysctl_table (ksocknal_top_ctl_table, 0); - - if (ksocknal_tunables.ksnd_sysctl == NULL) - return -ENOMEM; - - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ - if (ksocknal_tunables.ksnd_sysctl != NULL) - cfs_unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl); -} -#else -int -ksocknal_lib_tunables_init () -{ - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ -} -#endif - -/* - * To use bigger buffer for socket: - * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so - * we must patch kernel). - * 2. Increase net.inet.tcp.reass.maxsegments - * 3. Increase net.inet.tcp.sendspace - * 4. Increase net.inet.tcp.recvspace - * 5. Increase kern.ipc.maxsockbuf - */ -#define KSOCKNAL_MAX_BUFFER (1152*1024) - -void -ksocknal_lib_bind_irq (unsigned int irq) -{ - return; -} - -unsigned int -ksocknal_lib_sock_irq (cfs_socket_t *sock) -{ - return 0; -} - -int -ksocknal_lib_get_conn_addrs (ksock_conn_t *conn) -{ - int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1, - &conn->ksnc_ipaddr, - &conn->ksnc_port); - - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT (!conn->ksnc_closing); - - if (rc != 0) { - CERROR ("Error %d getting sock peer IP\n", rc); - return rc; - } - - rc = libcfs_sock_getaddr(conn->ksnc_sock, 0, - &conn->ksnc_myipaddr, NULL); - if (rc != 0) { - CERROR ("Error %d getting sock local IP\n", rc); - return rc; - } - - return 0; -} - -#ifdef __DARWIN8__ - -int -ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - socket_t sock = C2B_SOCK(conn->ksnc_sock); - size_t sndlen; - int nob; - int rc; - -#if SOCKNAL_SINGLE_FRAG_TX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_niov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = tx->tx_iov[i]; - nob += scratchiov[i].iov_len; - } - - /* - * XXX Liang: - * Linux has MSG_MORE, do we have anything to - * reduce number of partial TCP segments sent? - */ - rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); - if (rc == 0) - rc = sndlen; - return rc; -} - -int -ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - socket_t sock = C2B_SOCK(conn->ksnc_sock); - lnet_kiov_t *kiov = tx->tx_kiov; - int rc; - int nob; - size_t sndlen; - -#if SOCKNAL_SINGLE_FRAG_TX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_nkiov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - - /* - * XXX Liang: - * Linux has MSG_MORE, do wen have anyting to - * reduce number of partial TCP segments sent? - */ - rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); - for (i = 0; i < niov; i++) - cfs_kunmap(kiov[i].kiov_page); - if (rc == 0) - rc = sndlen; - return rc; -} - -int -ksocknal_lib_recv_iov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_niov; -#endif - struct iovec *iov = conn->ksnc_rx_iov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - size_t rcvlen; - int nob; - int i; - int rc; - - LASSERT (niov > 0); - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = iov[i]; - nob += scratchiov[i].iov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); - if (rc == 0) - rc = rcvlen; - - return rc; -} - -int -ksocknal_lib_recv_kiov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_nkiov; -#endif - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - int nob; - int i; - size_t rcvlen; - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + \ - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - rc = -sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); - for (i = 0; i < niov; i++) - cfs_kunmap(kiov[i].kiov_page); - if (rc == 0) - rc = rcvlen; - return (rc); -} - -void -ksocknal_lib_eager_ack (ksock_conn_t *conn) -{ - /* XXX Liang: */ -} - -int -ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) -{ - socket_t sock = C2B_SOCK(conn->ksnc_sock); - int len; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return (-ESHUTDOWN); - } - rc = libcfs_sock_getbuf(conn->ksnc_sock, txmem, rxmem); - if (rc == 0) { - len = sizeof(*nagle); - rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - nagle, &len); - } - ksocknal_connsock_decref(conn); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return (rc); -} - -int -ksocknal_lib_setup_sock (cfs_socket_t *sock) -{ - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - socket_t so = C2B_SOCK(sock); - struct linger linger; - - /* Ensure this socket aborts active sends immediately when we close - * it. */ - linger.l_onoff = 0; - linger.l_linger = 0; - rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger)); - if (rc != 0) { - CERROR ("Can't set SO_LINGER: %d\n", rc); - return (rc); - } - - if (!*ksocknal_tunables.ksnd_nagle) { - option = 1; - rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option)); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - return (rc); - } - } - - rc = libcfs_sock_setbuf(sock, - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc != 0) { - CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); - return (rc); - } - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - option = (do_keepalive ? 1 : 0); - - rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option)); - if (rc != 0) { - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); - return (rc); - } - - if (!do_keepalive) - return (rc); - rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE, - &keep_idle, sizeof(keep_idle)); - - return (rc); -} - -void -ksocknal_lib_push_conn(ksock_conn_t *conn) -{ - socket_t sock; - int val = 1; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) /* being shut down */ - return; - sock = C2B_SOCK(conn->ksnc_sock); - - rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); - LASSERT(rc == 0); - - ksocknal_connsock_decref(conn); - return; -} - -extern void ksocknal_read_callback (ksock_conn_t *conn); -extern void ksocknal_write_callback (ksock_conn_t *conn); - -static void -ksocknal_upcall(socket_t so, void *arg, int waitf) -{ - ksock_conn_t *conn = (ksock_conn_t *)arg; - ENTRY; - - read_lock (&ksocknal_data.ksnd_global_lock); - if (conn == NULL) - goto out; - - ksocknal_read_callback (conn); - /* XXX Liang */ - ksocknal_write_callback (conn); -out: - read_unlock (&ksocknal_data.ksnd_global_lock); - EXIT; -} - -void -ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn) -{ - /* No callback need to save in osx */ - return; -} - -void -ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn) -{ - libcfs_sock_set_cb(sock, ksocknal_upcall, (void *)conn); - return; -} - -void -ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn) -{ - libcfs_sock_reset_cb(sock); -} - -#else /* !__DARWIN8__ */ - -int -ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ -#if SOCKNAL_SINGLE_FRAG_TX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_niov; -#endif - struct socket *sock = conn->ksnc_sock; - int nob; - int rc; - int i; - struct uio suio = { - .uio_iov = scratchiov, - .uio_iovcnt = niov, - .uio_offset = 0, - .uio_resid = 0, /* This will be valued after a while */ - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_WRITE, - .uio_procp = NULL - }; - int flags = MSG_DONTWAIT; - CFS_DECL_NET_DATA; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = tx->tx_iov[i]; - nob += scratchiov[i].iov_len; - } - suio.uio_resid = nob; - - CFS_NET_IN; - rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags); - CFS_NET_EX; - - /* NB there is no return value can indicate how many - * have been sent and how many resid, we have to get - * sent bytes from suio. */ - if (rc != 0) { - if (suio.uio_resid != nob &&\ - (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) - /* We have sent something */ - rc = nob - suio.uio_resid; - else if ( rc == EWOULDBLOCK ) - /* Actually, EAGAIN and EWOULDBLOCK have same value in OSX */ - rc = -EAGAIN; - else - rc = -rc; - } else /* rc == 0 */ - rc = nob - suio.uio_resid; - - return rc; -} - -int -ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ -#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_nkiov; -#endif - struct socket *sock = conn->ksnc_sock; - lnet_kiov_t *kiov = tx->tx_kiov; - int nob; - int rc; - int i; - struct uio suio = { - .uio_iov = scratchiov, - .uio_iovcnt = niov, - .uio_offset = 0, - .uio_resid = 0, /* It should be valued after a while */ - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_WRITE, - .uio_procp = NULL - }; - int flags = MSG_DONTWAIT; - CFS_DECL_NET_DATA; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - suio.uio_resid = nob; - - CFS_NET_IN; - rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags); - CFS_NET_EX; - - for (i = 0; i < niov; i++) - cfs_kunmap(kiov[i].kiov_page); - - if (rc != 0) { - if (suio.uio_resid != nob &&\ - (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) - /* We have sent something */ - rc = nob - suio.uio_resid; - else if ( rc == EWOULDBLOCK ) - /* EAGAIN and EWOULD BLOCK have same value in OSX */ - rc = -EAGAIN; - else - rc = -rc; - } else /* rc == 0 */ - rc = nob - suio.uio_resid; - - return rc; -} - -/* - * liang: Hack of inpcb and tcpcb. - * To get tcpcb of a socket, and call tcp_output - * to send quick ack. - */ -struct ks_tseg_qent{ - int foo; -}; - -struct ks_tcptemp{ - int foo; -}; - -LIST_HEAD(ks_tsegqe_head, ks_tseg_qent); - -struct ks_tcpcb { - struct ks_tsegqe_head t_segq; - int t_dupacks; - struct ks_tcptemp *unused; - int t_timer[4]; - struct inpcb *t_inpcb; - int t_state; - u_int t_flags; - /* - * There are more fields but we dont need - * ...... - */ -}; - -#define TF_ACKNOW 0x00001 -#define TF_DELACK 0x00002 - -struct ks_inpcb { - LIST_ENTRY(ks_inpcb) inp_hash; - struct in_addr reserved1; - struct in_addr reserved2; - u_short inp_fport; - u_short inp_lport; - LIST_ENTRY(inpcb) inp_list; - caddr_t inp_ppcb; - /* - * There are more fields but we dont need - * ...... - */ -}; - -#define ks_sotoinpcb(so) ((struct ks_inpcb *)(so)->so_pcb) -#define ks_intotcpcb(ip) ((struct ks_tcpcb *)(ip)->inp_ppcb) -#define ks_sototcpcb(so) (intotcpcb(sotoinpcb(so))) - -void -ksocknal_lib_eager_ack (ksock_conn_t *conn) -{ - struct socket *sock = conn->ksnc_sock; - struct ks_inpcb *inp = ks_sotoinpcb(sock); - struct ks_tcpcb *tp = ks_intotcpcb(inp); - int s; - CFS_DECL_NET_DATA; - - extern int tcp_output(register struct ks_tcpcb *tp); - - CFS_NET_IN; - s = splnet(); - - /* - * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo - * to send immediate ACK. - */ - if (tp && tp->t_flags & TF_DELACK){ - tp->t_flags &= ~TF_DELACK; - tp->t_flags |= TF_ACKNOW; - (void) tcp_output(tp); - } - splx(s); - - CFS_NET_EX; - - return; -} - -int -ksocknal_lib_recv_iov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_niov; -#endif - struct iovec *iov = conn->ksnc_rx_iov; - int nob; - int rc; - int i; - struct uio ruio = { - .uio_iov = scratchiov, - .uio_iovcnt = niov, - .uio_offset = 0, - .uio_resid = 0, /* It should be valued after a while */ - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_READ, - .uio_procp = NULL - }; - int flags = MSG_DONTWAIT; - CFS_DECL_NET_DATA; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = iov[i]; - nob += scratchiov[i].iov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - ruio.uio_resid = nob; - - CFS_NET_IN; - rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, &flags); - CFS_NET_EX; - if (rc){ - if (ruio.uio_resid != nob && \ - (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK || rc == EAGAIN)) - /* data particially received */ - rc = nob - ruio.uio_resid; - else if (rc == EWOULDBLOCK) - /* EAGAIN and EWOULD BLOCK have same value in OSX */ - rc = -EAGAIN; - else - rc = -rc; - } else - rc = nob - ruio.uio_resid; - - return (rc); -} - -int -ksocknal_lib_recv_kiov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_nkiov; -#endif - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - int nob; - int rc; - int i; - struct uio ruio = { - .uio_iov = scratchiov, - .uio_iovcnt = niov, - .uio_offset = 0, - .uio_resid = 0, - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_READ, - .uio_procp = NULL - }; - int flags = MSG_DONTWAIT; - CFS_DECL_NET_DATA; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - ruio.uio_resid = nob; - - CFS_NET_IN; - rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, NULL, &flags); - CFS_NET_EX; - - for (i = 0; i < niov; i++) - cfs_kunmap(kiov[i].kiov_page); - - if (rc){ - if (ruio.uio_resid != nob && \ - (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) - /* data particially received */ - rc = nob - ruio.uio_resid; - else if (rc == EWOULDBLOCK) - /* receive blocked, EWOULDBLOCK == EAGAIN */ - rc = -EAGAIN; - else - rc = -rc; - } else - rc = nob - ruio.uio_resid; - - return (rc); -} - -int -ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) -{ - struct socket *sock = conn->ksnc_sock; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return -ESHUTDOWN; - } - rc = libcfs_sock_getbuf(sock, txmem, rxmem); - if (rc == 0) { - struct sockopt sopt; - int len; - CFS_DECL_NET_DATA; - - len = sizeof(*nagle); - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_GET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = nagle; - sopt.sopt_valsize = len; - - CFS_NET_IN; - rc = -sogetopt(sock, &sopt); - CFS_NET_EX; - } - - ksocknal_connsock_decref(conn); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - return (rc); -} - -int -ksocknal_lib_setup_sock (struct socket *so) -{ - struct sockopt sopt; - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - struct linger linger; - CFS_DECL_NET_DATA; - - rc = libcfs_sock_setbuf(so, - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc != 0) { - CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); - return (rc); - } - - /* Ensure this socket aborts active sends immediately when we close - * it. */ - bzero(&sopt, sizeof sopt); - - linger.l_onoff = 0; - linger.l_linger = 0; - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_LINGER; - sopt.sopt_val = &linger; - sopt.sopt_valsize = sizeof(linger); - - CFS_NET_IN; - rc = -sosetopt(so, &sopt); - if (rc != 0) { - CERROR ("Can't set SO_LINGER: %d\n", rc); - goto out; - } - - if (!*ksocknal_tunables.ksnd_nagle) { - option = 1; - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = &option; - sopt.sopt_valsize = sizeof(option); - rc = -sosetopt(so, &sopt); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - goto out; - } - } - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - option = (do_keepalive ? 1 : 0); - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_KEEPALIVE; - sopt.sopt_val = &option; - sopt.sopt_valsize = sizeof(option); - rc = -sosetopt(so, &sopt); - if (rc != 0) { - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); - goto out; - } - - if (!do_keepalive) { - /* no more setting, just return */ - rc = 0; - goto out; - } - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_KEEPALIVE; - sopt.sopt_val = &keep_idle; - sopt.sopt_valsize = sizeof(keep_idle); - rc = -sosetopt(so, &sopt); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc); - goto out; - } -out: - CFS_NET_EX; - return (rc); -} - -void -ksocknal_lib_push_conn(ksock_conn_t *conn) -{ - struct socket *sock; - struct sockopt sopt; - int val = 1; - int rc; - CFS_DECL_NET_DATA; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) /* being shut down */ - return; - sock = conn->ksnc_sock; - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - - CFS_NET_IN; - sosetopt(sock, &sopt); - CFS_NET_EX; - - ksocknal_connsock_decref(conn); - return; -} - - -extern void ksocknal_read_callback (ksock_conn_t *conn); -extern void ksocknal_write_callback (ksock_conn_t *conn); - -static void -ksocknal_upcall(struct socket *so, caddr_t arg, int waitf) -{ - ksock_conn_t *conn = (ksock_conn_t *)arg; - ENTRY; - - read_lock (&ksocknal_data.ksnd_global_lock); - if (conn == NULL) - goto out; - - if (so->so_rcv.sb_flags & SB_UPCALL) { - extern int soreadable(struct socket *so); - if (conn->ksnc_rx_nob_wanted && soreadable(so)) - /* To verify whether the upcall is for receive */ - ksocknal_read_callback (conn); - } - /* go foward? */ - if (so->so_snd.sb_flags & SB_UPCALL){ - extern int sowriteable(struct socket *so); - if (sowriteable(so)) - /* socket is writable */ - ksocknal_write_callback(conn); - } -out: - read_unlock (&ksocknal_data.ksnd_global_lock); - - EXIT; -} - -void -ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn) -{ - /* No callback need to save in osx */ - return; -} - -void -ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) -{ - CFS_DECL_NET_DATA; - - CFS_NET_IN; - sock->so_upcallarg = (void *)conn; - sock->so_upcall = ksocknal_upcall; - sock->so_snd.sb_timeo = 0; - sock->so_rcv.sb_timeo = cfs_time_seconds(2); - sock->so_rcv.sb_flags |= SB_UPCALL; - sock->so_snd.sb_flags |= SB_UPCALL; - CFS_NET_EX; - return; -} - -void -ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn) -{ - CFS_DECL_NET_DATA; - - CFS_NET_IN; - ksocknal_upcall (sock, (void *)conn, 0); - CFS_NET_EX; -} - -void -ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) -{ - CFS_DECL_NET_DATA; - - CFS_NET_IN; - sock->so_rcv.sb_flags &= ~SB_UPCALL; - sock->so_snd.sb_flags &= ~SB_UPCALL; - sock->so_upcall = NULL; - sock->so_upcallarg = NULL; - CFS_NET_EX; -} - -#endif /* !__DARWIN8__ */ diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.h b/lnet/klnds/socklnd/socklnd_lib-darwin.h deleted file mode 100644 index 9e7574ac807c194eb9ff5b62a9328c1ae8f274d8..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-darwin.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __XNU_SOCKNAL_LIB_H__ -#define __XNU_SOCKNAL_LIB_H__ - -#include <sys/kernel.h> -#include <sys/file.h> -#include <sys/filedesc.h> -#include <sys/stat.h> -#include <sys/vnode.h> -#include <sys/mount.h> -#include <sys/proc.h> -#include <sys/sysctl.h> -#include <sys/ubc.h> -#include <sys/uio.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/domain.h> -#include <sys/protosw.h> -#include <sys/namei.h> -#include <sys/fcntl.h> -#include <sys/lockf.h> -#include <sys/syslog.h> -#include <machine/spl.h> -#include <mach/mach_types.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <stdarg.h> - -#include <libcfs/libcfs.h> - -static inline -int ksocknal_nsched(void) -{ - /* XXX Liang: fix it */ - return 1; -} - -#endif diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.c b/lnet/klnds/socklnd/socklnd_lib-linux.c deleted file mode 100644 index 7707fd573dd4efe7b4c846381247400e8be19e6b..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-linux.c +++ /dev/null @@ -1,999 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#include "socklnd.h" - -# if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM -static cfs_sysctl_table_t ksocknal_ctl_table[21]; - -cfs_sysctl_table_t ksocknal_top_ctl_table[] = { - { - .ctl_name = 200, - .procname = "socknal", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ksocknal_ctl_table - }, - { 0 } -}; - -int -ksocknal_lib_tunables_init () -{ - int i = 0; - int j = 1; - - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "timeout", - .data = ksocknal_tunables.ksnd_timeout, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "credits", - .data = ksocknal_tunables.ksnd_credits, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "peer_credits", - .data = ksocknal_tunables.ksnd_peercredits, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "nconnds", - .data = ksocknal_tunables.ksnd_nconnds, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "min_reconnectms", - .data = ksocknal_tunables.ksnd_min_reconnectms, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "max_reconnectms", - .data = ksocknal_tunables.ksnd_max_reconnectms, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "eager_ack", - .data = ksocknal_tunables.ksnd_eager_ack, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "zero_copy", - .data = ksocknal_tunables.ksnd_zc_min_frag, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "typed", - .data = ksocknal_tunables.ksnd_typed_conns, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "min_bulk", - .data = ksocknal_tunables.ksnd_min_bulk, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "rx_buffer_size", - .data = ksocknal_tunables.ksnd_rx_buffer_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "tx_buffer_size", - .data = ksocknal_tunables.ksnd_tx_buffer_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "nagle", - .data = ksocknal_tunables.ksnd_nagle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; -#ifdef CPU_AFFINITY - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "irq_affinity", - .data = ksocknal_tunables.ksnd_irq_affinity, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; -#endif - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "keepalive_idle", - .data = ksocknal_tunables.ksnd_keepalive_idle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "keepalive_count", - .data = ksocknal_tunables.ksnd_keepalive_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "keepalive_intvl", - .data = ksocknal_tunables.ksnd_keepalive_intvl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; -#ifdef SOCKNAL_BACKOFF - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "backoff_init", - .data = ksocknal_tunables.ksnd_backoff_init, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "backoff_max", - .data = ksocknal_tunables.ksnd_backoff_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; -#endif -#if SOCKNAL_VERSION_DEBUG - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { - .ctl_name = j++, - .procname = "protocol", - .data = ksocknal_tunables.ksnd_protocol, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; -#endif - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { 0 }; - - LASSERT (j == i); - LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0])); - - ksocknal_tunables.ksnd_sysctl = - cfs_register_sysctl_table(ksocknal_top_ctl_table, 0); - - if (ksocknal_tunables.ksnd_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ - if (ksocknal_tunables.ksnd_sysctl != NULL) - cfs_unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl); -} -#else -int -ksocknal_lib_tunables_init () -{ - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ -} -#endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */ - -void -ksocknal_lib_bind_irq (unsigned int irq) -{ -#if (defined(CONFIG_SMP) && defined(CPU_AFFINITY)) - int bind; - int cpu; - char cmdline[64]; - ksock_irqinfo_t *info; - char *argv[] = {"/bin/sh", - "-c", - cmdline, - NULL}; - char *envp[] = {"HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - - LASSERT (irq < NR_IRQS); - if (irq == 0) /* software NIC or affinity disabled */ - return; - - info = &ksocknal_data.ksnd_irqinfo[irq]; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - LASSERT (info->ksni_valid); - bind = !info->ksni_bound; - info->ksni_bound = 1; - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - if (!bind) /* bound already */ - return; - - cpu = ksocknal_irqsched2cpu(info->ksni_sched); - snprintf (cmdline, sizeof (cmdline), - "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq); - - LCONSOLE_INFO("Binding irq %u to CPU %d with cmd: %s\n", - irq, cpu, cmdline); - - /* FIXME: Find a better method of setting IRQ affinity... - */ - - USERMODEHELPER(argv[0], argv, envp); -#endif -} - -int -ksocknal_lib_get_conn_addrs (ksock_conn_t *conn) -{ - int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1, - &conn->ksnc_ipaddr, - &conn->ksnc_port); - - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT (!conn->ksnc_closing); - - if (rc != 0) { - CERROR ("Error %d getting sock peer IP\n", rc); - return rc; - } - - rc = libcfs_sock_getaddr(conn->ksnc_sock, 0, - &conn->ksnc_myipaddr, NULL); - if (rc != 0) { - CERROR ("Error %d getting sock local IP\n", rc); - return rc; - } - - return 0; -} - -unsigned int -ksocknal_lib_sock_irq (struct socket *sock) -{ - int irq = 0; -#ifdef CPU_AFFINITY - struct dst_entry *dst; - - if (!*ksocknal_tunables.ksnd_irq_affinity) - return 0; - - dst = sk_dst_get (sock->sk); - if (dst != NULL) { - if (dst->dev != NULL) { - irq = dst->dev->irq; - if (irq >= NR_IRQS) { - CERROR ("Unexpected IRQ %x\n", irq); - irq = 0; - } - } - dst_release (dst); - } - -#endif - return irq; -} - -int -ksocknal_lib_zc_capable(struct socket *sock) -{ - int caps = sock->sk->sk_route_caps; - - /* ZC if the socket supports scatter/gather and doesn't need software - * checksums */ - return ((caps & NETIF_F_SG) != 0 && - (caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) != 0); -} - -int -ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; - int nob; - int rc; - - if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */ - conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */ - tx->tx_nob == tx->tx_resid && /* frist sending */ - tx->tx_msg.ksm_csum == 0) /* not checksummed */ - ksocknal_lib_csum_tx(tx); - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - - { -#if SOCKNAL_SINGLE_FRAG_TX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_niov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = tx->tx_iov[i]; - nob += scratchiov[i].iov_len; - } - - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - set_fs (KERNEL_DS); - rc = sock_sendmsg(sock, &msg, nob); - set_fs (oldmm); - } - return rc; -} - -int -ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; - lnet_kiov_t *kiov = tx->tx_kiov; - int rc; - int nob; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - - if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag && - tx->tx_msg.ksm_zc_req_cookie != 0) { - /* Zero copy is enabled */ - struct sock *sk = sock->sk; - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int fragsize = kiov->kiov_len; - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, kiov->kiov_len); - - if (!list_empty(&conn->ksnc_tx_queue) || - fragsize < tx->tx_resid) - msgflg |= MSG_MORE; - - if (sk->sk_prot->sendpage != NULL) { - rc = sk->sk_prot->sendpage(sk, page, - offset, fragsize, msgflg); - } else { - rc = tcp_sendpage(sock, page, offset, fragsize, msgflg); - } - } else { -#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else -#ifdef CONFIG_HIGHMEM -#warning "XXX risk of kmap deadlock on multiple frags..." -#endif - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_nkiov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - set_fs (KERNEL_DS); - rc = sock_sendmsg(sock, &msg, nob); - set_fs (oldmm); - - for (i = 0; i < niov; i++) - kunmap(kiov[i].kiov_page); - } - return rc; -} - -void -ksocknal_lib_eager_ack (ksock_conn_t *conn) -{ - int opt = 1; - mm_segment_t oldmm = get_fs(); - struct socket *sock = conn->ksnc_sock; - - /* Remind the socket to ACK eagerly. If I don't, the socket might - * think I'm about to send something it could piggy-back the ACK - * on, introducing delay in completing zero-copy sends in my - * peer. */ - - set_fs(KERNEL_DS); - sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK, - (char *)&opt, sizeof (opt)); - set_fs(oldmm); -} - -int -ksocknal_lib_recv_iov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_niov; -#endif - struct iovec *iov = conn->ksnc_rx_iov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int nob; - int i; - int rc; - int fragnob; - int sum; - __u32 saved_csum; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - LASSERT (niov > 0); - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = iov[i]; - nob += scratchiov[i].iov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); - /* NB this is just a boolean..........................^ */ - set_fs (oldmm); - - saved_csum = 0; - if (conn->ksnc_proto == &ksocknal_protocol_v2x) { - saved_csum = conn->ksnc_msg.ksm_csum; - conn->ksnc_msg.ksm_csum = 0; - } - - if (saved_csum != 0) { - /* accumulate checksum */ - for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { - LASSERT (i < niov); - - fragnob = iov[i].iov_len; - if (fragnob > sum) - fragnob = sum; - - conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, - iov[i].iov_base, fragnob); - } - conn->ksnc_msg.ksm_csum = saved_csum; - } - - return rc; -} - -int -ksocknal_lib_recv_kiov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else -#ifdef CONFIG_HIGHMEM -#warning "XXX risk of kmap deadlock on multiple frags..." -#endif - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_nkiov; -#endif - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int nob; - int i; - int rc; - void *base; - int sum; - int fragnob; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); - /* NB this is just a boolean.......................^ */ - set_fs (oldmm); - - if (conn->ksnc_msg.ksm_csum != 0) { - for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { - LASSERT (i < niov); - - /* Dang! have to kmap again because I have nowhere to stash the - * mapped address. But by doing it while the page is still - * mapped, the kernel just bumps the map count and returns me - * the address it stashed. */ - base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; - fragnob = kiov[i].kiov_len; - if (fragnob > sum) - fragnob = sum; - - conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, - base, fragnob); - - kunmap(kiov[i].kiov_page); - } - } - for (i = 0; i < niov; i++) - kunmap(kiov[i].kiov_page); - - return (rc); -} - -void -ksocknal_lib_csum_tx(ksock_tx_t *tx) -{ - int i; - __u32 csum; - void *base; - - LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg); - LASSERT(tx->tx_conn != NULL); - LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x); - - tx->tx_msg.ksm_csum = 0; - - csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base, - tx->tx_iov[0].iov_len); - - if (tx->tx_kiov != NULL) { - for (i = 0; i < tx->tx_nkiov; i++) { - base = kmap(tx->tx_kiov[i].kiov_page) + - tx->tx_kiov[i].kiov_offset; - - csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len); - - kunmap(tx->tx_kiov[i].kiov_page); - } - } else { - for (i = 1; i < tx->tx_niov; i++) - csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base, - tx->tx_iov[i].iov_len); - } - - if (*ksocknal_tunables.ksnd_inject_csum_error) { - csum++; - *ksocknal_tunables.ksnd_inject_csum_error = 0; - } - - tx->tx_msg.ksm_csum = csum; -} - -int -ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) -{ - mm_segment_t oldmm = get_fs (); - struct socket *sock = conn->ksnc_sock; - int len; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return (-ESHUTDOWN); - } - - rc = libcfs_sock_getbuf(sock, txmem, rxmem); - if (rc == 0) { - len = sizeof(*nagle); - set_fs(KERNEL_DS); - rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)nagle, &len); - set_fs(oldmm); - } - - ksocknal_connsock_decref(conn); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return (rc); -} - -int -ksocknal_lib_setup_sock (struct socket *sock) -{ - mm_segment_t oldmm = get_fs (); - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - struct linger linger; - - sock->sk->sk_allocation = GFP_NOFS; - - /* Ensure this socket aborts active sends immediately when we close - * it. */ - - linger.l_onoff = 0; - linger.l_linger = 0; - - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER, - (char *)&linger, sizeof (linger)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set SO_LINGER: %d\n", rc); - return (rc); - } - - option = -1; - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set SO_LINGER2: %d\n", rc); - return (rc); - } - - if (!*ksocknal_tunables.ksnd_nagle) { - option = 1; - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - return (rc); - } - } - - rc = libcfs_sock_setbuf(sock, - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc != 0) { - CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); - return (rc); - } - -/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */ -#ifdef SOCKNAL_BACKOFF - if (*ksocknal_tunables.ksnd_backoff_init > 0) { - option = *ksocknal_tunables.ksnd_backoff_init; - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_BACKOFF_INIT, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set initial tcp backoff %d: %d\n", - option, rc); - return (rc); - } - } - - if (*ksocknal_tunables.ksnd_backoff_max > 0) { - option = *ksocknal_tunables.ksnd_backoff_max; - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_BACKOFF_MAX, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set maximum tcp backoff %d: %d\n", - option, rc); - return (rc); - } - } -#endif - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - - option = (do_keepalive ? 1 : 0); - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); - return (rc); - } - - if (!do_keepalive) - return (0); - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keep_idle, sizeof (keep_idle)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc); - return (rc); - } - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keep_intvl, sizeof (keep_intvl)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc); - return (rc); - } - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keep_count, sizeof (keep_count)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPCNT: %d\n", rc); - return (rc); - } - - return (0); -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - return &(sk->tp_pinfo.af_tcp); -} -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)) -#define sock2tcp_opt(sk) tcp_sk(sk) -#else -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - struct tcp_sock *s = (struct tcp_sock *)sk; - return &s->tcp; -} -#endif - -void -ksocknal_lib_push_conn (ksock_conn_t *conn) -{ - struct sock *sk; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)) - struct tcp_opt *tp; -#else - struct tcp_sock *tp; -#endif - int nonagle; - int val = 1; - int rc; - mm_segment_t oldmm; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) /* being shut down */ - return; - - sk = conn->ksnc_sock->sk; - tp = sock2tcp_opt(sk); - - lock_sock (sk); - nonagle = tp->nonagle; - tp->nonagle = 1; - release_sock (sk); - - oldmm = get_fs (); - set_fs (KERNEL_DS); - - rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof (val)); - LASSERT (rc == 0); - - set_fs (oldmm); - - lock_sock (sk); - tp->nonagle = nonagle; - release_sock (sk); - - ksocknal_connsock_decref(conn); -} - -extern void ksocknal_read_callback (ksock_conn_t *conn); -extern void ksocknal_write_callback (ksock_conn_t *conn); -/* - * socket call back in Linux - */ -static void -ksocknal_data_ready (struct sock *sk, int n) -{ - ksock_conn_t *conn; - ENTRY; - - /* interleave correctly with closing sockets... */ - LASSERT(!in_irq()); - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - if (conn == NULL) { /* raced with ksocknal_terminate_conn */ - LASSERT (sk->sk_data_ready != &ksocknal_data_ready); - sk->sk_data_ready (sk, n); - } else - ksocknal_read_callback(conn); - - read_unlock (&ksocknal_data.ksnd_global_lock); - - EXIT; -} - -static void -ksocknal_write_space (struct sock *sk) -{ - ksock_conn_t *conn; - int wspace; - int min_wpace; - - /* interleave correctly with closing sockets... */ - LASSERT(!in_irq()); - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - wspace = SOCKNAL_WSPACE(sk); - min_wpace = SOCKNAL_MIN_WSPACE(sk); - - CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", - sk, wspace, min_wpace, conn, - (conn == NULL) ? "" : (conn->ksnc_tx_ready ? - " ready" : " blocked"), - (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? - " scheduled" : " idle"), - (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? - " empty" : " queued")); - - if (conn == NULL) { /* raced with ksocknal_terminate_conn */ - LASSERT (sk->sk_write_space != &ksocknal_write_space); - sk->sk_write_space (sk); - - read_unlock (&ksocknal_data.ksnd_global_lock); - return; - } - - if (wspace >= min_wpace) { /* got enough space */ - ksocknal_write_callback(conn); - - /* Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the - * ENOMEM check in ksocknal_transmit is race-free (think about - * it). */ - - clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags); - } - - read_unlock (&ksocknal_data.ksnd_global_lock); -} - -void -ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn) -{ - conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; - conn->ksnc_saved_write_space = sock->sk->sk_write_space; -} - -void -ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) -{ - sock->sk->sk_user_data = conn; - sock->sk->sk_data_ready = ksocknal_data_ready; - sock->sk->sk_write_space = ksocknal_write_space; - return; -} - -void -ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) -{ - /* Remove conn's network callbacks. - * NB I _have_ to restore the callback, rather than storing a noop, - * since the socket could survive past this module being unloaded!! */ - sock->sk->sk_data_ready = conn->ksnc_saved_data_ready; - sock->sk->sk_write_space = conn->ksnc_saved_write_space; - - /* A callback could be in progress already; they hold a read lock - * on ksnd_global_lock (to serialise with me) and NOOP if - * sk_user_data is NULL. */ - sock->sk->sk_user_data = NULL; - - return ; -} - diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.h b/lnet/klnds/socklnd/socklnd_lib-linux.h deleted file mode 100644 index 39aab81556159f241bf0b19cdcec83c4d6fbc8f7..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-linux.h +++ /dev/null @@ -1,121 +0,0 @@ -#define DEBUG_PORTAL_ALLOC -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifndef __LINUX_SOCKNAL_LIB_H__ -#define __LINUX_SOCKNAL_LIB_H__ - -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/version.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <net/sock.h> -#include <net/tcp.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/irq.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <asm/uaccess.h> -#include <asm/div64.h> - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -# include <linux/syscalls.h> -#endif - -#include <libcfs/kp30.h> -#include <libcfs/linux/portals_compat25.h> - -#include <linux/crc32.h> -static inline __u32 ksocknal_csum(__u32 crc, unsigned char const *p, size_t len) -{ -#if 1 - return crc32_le(crc, p, len); -#else - while (len-- > 0) - crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ; - return crc; -#endif -} - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,7)) -# define SOCKNAL_WSPACE(sk) sk_stream_wspace(sk) -# define SOCKNAL_MIN_WSPACE(sk) sk_stream_min_wspace(sk) -#else -# define SOCKNAL_WSPACE(sk) tcp_wspace(sk) -# define SOCKNAL_MIN_WSPACE(sk) (((sk)->sk_sndbuf*8)/10) -#endif - -#ifndef CONFIG_SMP -static inline -int ksocknal_nsched(void) -{ - return 1; -} -#else -#include <linux/lustre_version.h> -# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT)) -static inline int -ksocknal_nsched(void) -{ - return num_online_cpus(); -} - -static inline int -ksocknal_sched2cpu(int i) -{ - return i; -} - -static inline int -ksocknal_irqsched2cpu(int i) -{ - return i; -} -# else -static inline int -ksocknal_nsched(void) -{ - if (smp_num_siblings == 1) - return (num_online_cpus()); - - /* We need to know if this assumption is crap */ - LASSERT (smp_num_siblings == 2); - return (num_online_cpus()/2); -} - -static inline int -ksocknal_sched2cpu(int i) -{ - if (smp_num_siblings == 1) - return i; - - return (i * 2); -} - -static inline int -ksocknal_irqsched2cpu(int i) -{ - return (ksocknal_sched2cpu(i) + 1); -} -# endif -#endif - -#endif diff --git a/lnet/klnds/socklnd/socklnd_lib-winnt.c b/lnet/klnds/socklnd/socklnd_lib-winnt.c deleted file mode 100755 index f0366d886d01b6c06019cc420ba119cb3cd12e95..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-winnt.c +++ /dev/null @@ -1,833 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc, All rights reserved. - * Author: Matt Wu - * - * This file is part of Lustre, http://www.lustre.org. - * - * This Lustre Software is proprietary - please refer to the license - * agreement you received with your software. - * - * windows socknal library - * - */ - -#include "socklnd.h" - -# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM -static ctl_table ksocknal_ctl_table[18]; - -ctl_table ksocknal_top_ctl_table[] = { - {200, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, - { 0 } -}; - -int -ksocknal_lib_tunables_init () -{ - int i = 0; - int j = 1; - - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "timeout", ksocknal_tunables.ksnd_timeout, - sizeof (int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "credits", ksocknal_tunables.ksnd_credits, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "peer_credits", ksocknal_tunables.ksnd_peercredits, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "nconnds", ksocknal_tunables.ksnd_nconnds, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "min_reconnectms", ksocknal_tunables.ksnd_min_reconnectms, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "max_reconnectms", ksocknal_tunables.ksnd_max_reconnectms, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "eager_ack", ksocknal_tunables.ksnd_eager_ack, - sizeof (int), 0644, NULL, &proc_dointvec}; -#if SOCKNAL_ZC - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "zero_copy", ksocknal_tunables.ksnd_zc_min_frag, - sizeof (int), 0644, NULL, &proc_dointvec}; -#endif - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "typed", ksocknal_tunables.ksnd_typed_conns, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "min_bulk", ksocknal_tunables.ksnd_min_bulk, - sizeof (int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "buffer_size", ksocknal_tunables.ksnd_buffer_size, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "nagle", ksocknal_tunables.ksnd_nagle, - sizeof(int), 0644, NULL, &proc_dointvec}; -#ifdef CPU_AFFINITY - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "irq_affinity", ksocknal_tunables.ksnd_irq_affinity, - sizeof(int), 0644, NULL, &proc_dointvec}; -#endif - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "keepalive_idle", ksocknal_tunables.ksnd_keepalive_idle, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "keepalive_count", ksocknal_tunables.ksnd_keepalive_count, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "keepalive_intvl", ksocknal_tunables.ksnd_keepalive_intvl, - sizeof(int), 0644, NULL, &proc_dointvec}; - - LASSERT (j == i+1); - LASSERT (i < sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0])); - - ksocknal_tunables.ksnd_sysctl = - register_sysctl_table(ksocknal_top_ctl_table, 0); - - if (ksocknal_tunables.ksnd_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ - if (ksocknal_tunables.ksnd_sysctl != NULL) - unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl); -} -#else -int -ksocknal_lib_tunables_init () -{ - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ -} -#endif - -void -ksocknal_lib_bind_irq (unsigned int irq) -{ -} - -int -ksocknal_lib_get_conn_addrs (ksock_conn_t *conn) -{ - int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1, - &conn->ksnc_ipaddr, &conn->ksnc_port); - - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT (!conn->ksnc_closing); - - if (rc != 0) { - CERROR ("Error %d getting sock peer IP\n", rc); - return rc; - } - - rc = libcfs_sock_getaddr(conn->ksnc_sock, 0, - &conn->ksnc_myipaddr, NULL); - if (rc != 0) { - CERROR ("Error %d getting sock local IP\n", rc); - return rc; - } - - return 0; -} - -unsigned int -ksocknal_lib_sock_irq (struct socket *sock) -{ - return 0; -} - -#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) -static struct page * -ksocknal_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) - page = vmalloc_to_page ((void *)vaddr); -#ifdef CONFIG_HIGHMEM - else if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) - page = vmalloc_to_page ((void *)vaddr); - /* in 2.4 ^ just walks the page tables */ -#endif - else - page = virt_to_page (vaddr); - - if (page == NULL || - !VALID_PAGE (page)) - return (NULL); - - return (page); -} -#endif - -/* - * ks_lock_iovs - * Lock the i/o vector buffers into MDL structure - * - * Arguments: - * iov: the array of i/o vectors - * niov: number of i/o vectors to be locked - * len: the real length of the iov vectors - * - * Return Value: - * ksock_mdl_t *: the Mdl of the locked buffers or - * NULL pointer in failure case - * - * Notes: - * N/A - */ - -ksock_mdl_t * -ks_lock_iovs( - IN struct iovec *iov, - IN int niov, - IN int recving, - IN int * len ) -{ - int rc = 0; - - int i = 0; - int total = 0; - ksock_mdl_t * mdl = NULL; - ksock_mdl_t * tail = NULL; - - LASSERT(iov != NULL); - LASSERT(niov > 0); - LASSERT(len != NULL); - - for (i=0; i < niov; i++) { - - ksock_mdl_t * Iovec = NULL; - - rc = ks_lock_buffer( - iov[i].iov_base, - FALSE, - iov[i].iov_len, - recving ? IoWriteAccess : IoReadAccess, - &Iovec ); - - if (rc < 0) { - break; - } - - if (tail) { - tail->Next = Iovec; - } else { - mdl = Iovec; - } - - tail = Iovec; - - total +=iov[i].iov_len; - } - - if (rc >= 0) { - *len = total; - } else { - if (mdl) { - ks_release_mdl(mdl, FALSE); - mdl = NULL; - } - } - - return mdl; -} - -/* - * ks_lock_kiovs - * Lock the kiov pages into MDL structure - * - * Arguments: - * kiov: the array of kiov pages - * niov: number of kiov to be locked - * len: the real length of the kiov arrary - * - * Return Value: - * PMDL: the Mdl of the locked buffers or NULL - * pointer in failure case - * - * Notes: - * N/A - */ -ksock_mdl_t * -ks_lock_kiovs( - IN lnet_kiov_t * kiov, - IN int nkiov, - IN int recving, - IN int * len ) -{ - int rc = 0; - int i = 0; - int total = 0; - ksock_mdl_t * mdl = NULL; - ksock_mdl_t * tail = NULL; - - LASSERT(kiov != NULL); - LASSERT(nkiov > 0); - LASSERT(len != NULL); - - for (i=0; i < nkiov; i++) { - - ksock_mdl_t * Iovec = NULL; - - - // - // Lock the kiov page into Iovec ¡ - // - - rc = ks_lock_buffer( - (PUCHAR)kiov[i].kiov_page->addr + - kiov[i].kiov_offset, - FALSE, - kiov[i].kiov_len, - recving ? IoWriteAccess : IoReadAccess, - &Iovec - ); - - if (rc < 0) { - break; - } - - // - // Attach the Iovec to the mdl chain - // - - if (tail) { - tail->Next = Iovec; - } else { - mdl = Iovec; - } - - tail = Iovec; - - total += kiov[i].kiov_len; - - } - - if (rc >= 0) { - *len = total; - } else { - if (mdl) { - ks_release_mdl(mdl, FALSE); - mdl = NULL; - } - } - - return mdl; -} - - -int -ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; -#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) - unsigned long vaddr = (unsigned long)iov->iov_base - int offset = vaddr & (PAGE_SIZE - 1); - int zcsize = MIN (iov->iov_len, PAGE_SIZE - offset); - struct page *page; -#endif - int nob; - int rc; - ksock_mdl_t * mdl; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - -#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) - if (zcsize >= ksocknal_data.ksnd_zc_min_frag && - (sock->sk->sk_route_caps & NETIF_F_SG) && - (sock->sk->sk_route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) && - (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) { - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n", - (void *)vaddr, page, page_address(page), offset, zcsize); - - if (!list_empty (&conn->ksnc_tx_queue) || - zcsize < tx->tx_resid) - msgflg |= MSG_MORE; - - rc = tcp_sendpage_zccd(sock, page, offset, zcsize, msgflg, &tx->tx_zccd); - } else -#endif - { - /* lock the whole tx iovs into a single mdl chain */ - mdl = ks_lock_iovs(tx->tx_iov, tx->tx_niov, FALSE, &nob); - - if (mdl) { - /* send the total mdl chain */ - rc = ks_send_mdl( conn->ksnc_sock, tx, mdl, nob, - (!list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ? - (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT); - } else { - rc = -ENOMEM; - } - } - - return rc; -} - -int -ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; - lnet_kiov_t *kiov = tx->tx_kiov; - int rc; - int nob; - ksock_mdl_t * mdl; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - -#if SOCKNAL_ZC - if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag && - (sock->sk->sk_route_caps & NETIF_F_SG) && - (sock->sk->sk_route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) { - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int fragsize = kiov->kiov_len; - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, kiov->kiov_len); - - if (!list_empty(&conn->ksnc_tx_queue) || - fragsize < tx->tx_resid) - msgflg |= MSG_MORE; - - rc = tcp_sendpage_zccd(sock, page, offset, fragsize, msgflg, - &tx->tx_zccd); - } else -#endif - { - /* lock the whole tx kiovs into a single mdl chain */ - mdl = ks_lock_kiovs(tx->tx_kiov, tx->tx_nkiov, FALSE, &nob); - - if (mdl) { - /* send the total mdl chain */ - rc = ks_send_mdl( - conn->ksnc_sock, tx, mdl, nob, - (!list_empty(&conn->ksnc_tx_queue) || nob < tx->tx_resid) ? - (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT); - } else { - rc = -ENOMEM; - } - } - - return rc; -} - - -int -ksocknal_lib_recv_iov (ksock_conn_t *conn) -{ - struct iovec *iov = conn->ksnc_rx_iov; - int rc; - int size; - ksock_mdl_t * mdl; - - /* lock the whole tx iovs into a single mdl chain */ - mdl = ks_lock_iovs(iov, conn->ksnc_rx_niov, TRUE, &size); - - if (!mdl) { - return (-ENOMEM); - } - - LASSERT (size <= conn->ksnc_rx_nob_wanted); - - /* try to request data for the whole mdl chain */ - rc = ks_recv_mdl (conn->ksnc_sock, mdl, size, MSG_DONTWAIT); - - return rc; -} - -int -ksocknal_lib_recv_kiov (ksock_conn_t *conn) -{ - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - int size; - int rc; - ksock_mdl_t * mdl; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only receive 1 frag at a time. */ - LASSERT (conn->ksnc_rx_nkiov > 0); - - /* lock the whole tx kiovs into a single mdl chain */ - mdl = ks_lock_kiovs(kiov, conn->ksnc_rx_nkiov, TRUE, &size); - - if (!mdl) { - rc = -ENOMEM; - return (rc); - } - - LASSERT (size <= conn->ksnc_rx_nob_wanted); - - /* try to request data for the whole mdl chain */ - rc = ks_recv_mdl (conn->ksnc_sock, mdl, size, MSG_DONTWAIT); - - return rc; -} - -void -ksocknal_lib_eager_ack (ksock_conn_t *conn) -{ - __u32 option = 1; - int rc = 0; - - rc = ks_set_tcp_option( - conn->ksnc_sock, TCP_SOCKET_NODELAY, - &option, sizeof(option) ); - if (rc != 0) { - CERROR("Can't disable nagle: %d\n", rc); - } -} - -int -ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) -{ - ksock_tconn_t * tconn = conn->ksnc_sock; - int len; - int rc; - - ks_get_tconn (tconn); - - *txmem = *rxmem = 0; - - len = sizeof(*nagle); - - rc = ks_get_tcp_option( - tconn, TCP_SOCKET_NODELAY, - (__u32 *)nagle, &len); - - ks_put_tconn (tconn); - - printk("ksocknal_get_conn_tunables: nodelay = %d rc = %d\n", *nagle, rc); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return (rc); -} - -int -ksocknal_lib_buffersize (int current_sz, int tunable_sz) -{ - /* ensure >= SOCKNAL_MIN_BUFFER */ - if (current_sz < SOCKNAL_MIN_BUFFER) - return MAX(SOCKNAL_MIN_BUFFER, tunable_sz); - - if (tunable_sz > SOCKNAL_MIN_BUFFER) - return tunable_sz; - - /* leave alone */ - return 0; -} - -int -ksocknal_lib_setup_sock (struct socket *sock) -{ - int rc; - - int keep_idle; - int keep_count; - int keep_intvl; - int keep_alive; - - __u32 option; - - /* set the window size */ - -#if 0 - tconn->kstc_snd_wnd = ksocknal_tunables.ksnd_buffer_size; - tconn->kstc_rcv_wnd = ksocknal_tunables.ksnd_buffer_size; -#endif - - /* disable nagle */ - if (!ksocknal_tunables.ksnd_nagle) { - option = 1; - - rc = ks_set_tcp_option( - sock, TCP_SOCKET_NODELAY, - &option, sizeof (option)); - if (rc != 0) { - printk ("Can't disable nagle: %d\n", rc); - return (rc); - } - } - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - keep_alive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - - option = (__u32)(keep_alive ? 1 : 0); - - rc = ks_set_tcp_option( - sock, TCP_SOCKET_KEEPALIVE, - &option, sizeof (option)); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - return (rc); - } - - return (0); -} - -void -ksocknal_lib_push_conn (ksock_conn_t *conn) -{ - ksock_tconn_t * tconn; - __u32 nagle; - __u32 val = 1; - int rc; - - tconn = conn->ksnc_sock; - - ks_get_tconn(tconn); - - spin_lock(&tconn->kstc_lock); - if (tconn->kstc_type == kstt_sender) { - nagle = tconn->sender.kstc_info.nagle; - tconn->sender.kstc_info.nagle = 0; - } else { - LASSERT(tconn->kstc_type == kstt_child); - nagle = tconn->child.kstc_info.nagle; - tconn->child.kstc_info.nagle = 0; - } - - spin_unlock(&tconn->kstc_lock); - - val = 1; - rc = ks_set_tcp_option( - tconn, - TCP_SOCKET_NODELAY, - &(val), - sizeof(__u32) - ); - - LASSERT (rc == 0); - spin_lock(&tconn->kstc_lock); - - if (tconn->kstc_type == kstt_sender) { - tconn->sender.kstc_info.nagle = nagle; - } else { - LASSERT(tconn->kstc_type == kstt_child); - tconn->child.kstc_info.nagle = nagle; - } - spin_unlock(&tconn->kstc_lock); - - ks_put_tconn(tconn); -} - -/* @mode: 0: receiving mode / 1: sending mode */ -void -ksocknal_sched_conn (ksock_conn_t *conn, int mode, ksock_tx_t *tx) -{ - int flags; - ksock_sched_t * sched; - ENTRY; - - /* interleave correctly with closing sockets... */ - read_lock (&ksocknal_data.ksnd_global_lock); - - sched = conn->ksnc_scheduler; - - spin_lock_irqsave (&sched->kss_lock, flags); - - if (mode) { /* transmission can continue ... */ - -#error "This is out of date - we should be calling ksocknal_write_callback()" - conn->ksnc_tx_ready = 1; - - if (tx) { - /* Incomplete send: place tx on HEAD of tx_queue */ - list_add (&tx->tx_list, &conn->ksnc_tx_queue); - } - - if ( !conn->ksnc_tx_scheduled && - !list_empty(&conn->ksnc_tx_queue)) { //packets to send - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_conn_refcount); - - cfs_waitq_signal (&sched->kss_waitq); - } - } else { /* receiving can continue ... */ - - conn->ksnc_rx_ready = 1; - - if ( !conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_conn_refcount); - - cfs_waitq_signal (&sched->kss_waitq); - } - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - read_unlock (&ksocknal_data.ksnd_global_lock); - - EXIT; -} - -void ksocknal_schedule_callback(struct socket*sock, int mode, void * tx, ulong_ptr bytes) -{ - ksock_conn_t * conn = (ksock_conn_t *) sock->kstc_conn; - - if (mode) { - ksocknal_sched_conn(conn, mode, tx); - } else { - if ( CAN_BE_SCHED(bytes, (ulong_ptr)conn->ksnc_rx_nob_wanted )) { - ksocknal_sched_conn(conn, mode, tx); - } - } -} - -extern void -ksocknal_tx_launched (ksock_tx_t *tx); - -void -ksocknal_fini_sending(ksock_tcpx_fini_t *tcpx) -{ - ksocknal_tx_launched(tcpx->tx); - cfs_free(tcpx); -} - -void * -ksocknal_update_tx( - struct socket* tconn, - void * txp, - ulong_ptr rc - ) -{ - ksock_tx_t * tx = (ksock_tx_t *)txp; - - /* - * the transmission was done, we need update the tx - */ - - LASSERT(tx->tx_resid >= (int)rc); - tx->tx_resid -= (int)rc; - - /* - * just partial of tx is sent out, we need update - * the fields of tx and schedule later transmission. - */ - - if (tx->tx_resid) { - - if (tx->tx_niov > 0) { - - /* if there's iov, we need process iov first */ - while (rc > 0 ) { - if (rc < tx->tx_iov->iov_len) { - /* didn't send whole iov entry... */ - tx->tx_iov->iov_base = - (char *)(tx->tx_iov->iov_base) + rc; - tx->tx_iov->iov_len -= rc; - rc = 0; - } else { - /* the whole of iov was sent out */ - rc -= tx->tx_iov->iov_len; - tx->tx_iov++; - tx->tx_niov--; - } - } - - } else { - - /* now we need process the kiov queues ... */ - - while (rc > 0 ) { - - if (rc < tx->tx_kiov->kiov_len) { - /* didn't send whole kiov entry... */ - tx->tx_kiov->kiov_offset += rc; - tx->tx_kiov->kiov_len -= rc; - rc = 0; - } else { - /* whole kiov was sent out */ - rc -= tx->tx_kiov->kiov_len; - tx->tx_kiov++; - tx->tx_nkiov--; - } - } - } - - } else { - - ksock_tcpx_fini_t * tcpx = - cfs_alloc(sizeof(ksock_tcpx_fini_t), CFS_ALLOC_ZERO); - - ASSERT(tx->tx_resid == 0); - - if (!tcpx) { - - ksocknal_tx_launched (tx); - - } else { - - tcpx->tx = tx; - ExInitializeWorkItem( - &(tcpx->item), - ksocknal_fini_sending, - tcpx - ); - ExQueueWorkItem( - &(tcpx->item), - CriticalWorkQueue - ); - } - - tx = NULL; - } - - return (void *)tx; -} - -void -ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn) -{ -} - -void -ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) -{ - sock->kstc_conn = conn; - sock->kstc_sched_cb = ksocknal_schedule_callback; - sock->kstc_update_tx = ksocknal_update_tx; -} - -void -ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) -{ - sock->kstc_conn = NULL; - sock->kstc_sched_cb = NULL; - sock->kstc_update_tx = NULL; -} - diff --git a/lnet/klnds/socklnd/socklnd_lib-winnt.h b/lnet/klnds/socklnd/socklnd_lib-winnt.h deleted file mode 100755 index 492c9f595ee558d90b13f7d98a699c329111a867..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-winnt.h +++ /dev/null @@ -1,42 +0,0 @@ -#define DEBUG_PORTAL_ALLOC -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifndef __WINNT_TDILND_LIB_H__ -#define __WINNT_TDILND_LIB_H__ - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#ifndef CONFIG_SMP - -static inline -int ksocknal_nsched(void) -{ - return 1; -} - -#else - -static inline int -ksocknal_nsched(void) -{ - return num_online_cpus(); -} - -static inline int -ksocknal_sched2cpu(int i) -{ - return i; -} - -static inline int -ksocknal_irqsched2cpu(int i) -{ - return i; -} - -#endif - -#endif diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c deleted file mode 100644 index 08da2b375dad95ce3e20585d5f3e42b5d60bff40..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ /dev/null @@ -1,165 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socklnd.h" - -static int sock_timeout = 50; -CFS_MODULE_PARM(sock_timeout, "i", int, 0644, - "dead socket timeout (seconds)"); - -static int credits = 256; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int nconnds = 4; -CFS_MODULE_PARM(nconnds, "i", int, 0444, - "# connection daemons"); - -static int min_reconnectms = 1000; -CFS_MODULE_PARM(min_reconnectms, "i", int, 0644, - "min connection retry interval (mS)"); - -static int max_reconnectms = 60000; -CFS_MODULE_PARM(max_reconnectms, "i", int, 0644, - "max connection retry interval (mS)"); - -#if defined(__APPLE__) && !defined(__DARWIN8__) -# define DEFAULT_EAGER_ACK 1 -#else -# define DEFAULT_EAGER_ACK 0 -#endif -static int eager_ack = DEFAULT_EAGER_ACK; -CFS_MODULE_PARM(eager_ack, "i", int, 0644, - "send tcp ack packets eagerly"); - -static int typed_conns = 1; -CFS_MODULE_PARM(typed_conns, "i", int, 0444, - "use different sockets for bulk"); - -static int min_bulk = (1<<10); -CFS_MODULE_PARM(min_bulk, "i", int, 0644, - "smallest 'large' message"); - -#ifdef __APPLE__ -# ifdef __DARWIN8__ -# define DEFAULT_BUFFER_SIZE (224*1024) -# else -# define DEFAULT_BUFFER_SIZE (1152 * 1024) -# endif -#else -# define DEFAULT_BUFFER_SIZE 0 -#endif -static int tx_buffer_size = DEFAULT_BUFFER_SIZE; -CFS_MODULE_PARM(tx_buffer_size, "i", int, 0644, - "socket tx buffer size (0 for system default)"); - -static int rx_buffer_size = DEFAULT_BUFFER_SIZE; -CFS_MODULE_PARM(rx_buffer_size, "i", int, 0644, - "socket rx buffer size (0 for system default)"); - -static int nagle = 0; -CFS_MODULE_PARM(nagle, "i", int, 0644, - "enable NAGLE?"); - -static int keepalive_idle = 30; -CFS_MODULE_PARM(keepalive_idle, "i", int, 0644, - "# idle seconds before probe"); - -#ifdef HAVE_BGL_SUPPORT -#define DEFAULT_KEEPALIVE_COUNT 100 -#else -#define DEFAULT_KEEPALIVE_COUNT 5 -#endif -static int keepalive_count = DEFAULT_KEEPALIVE_COUNT; -CFS_MODULE_PARM(keepalive_count, "i", int, 0644, - "# missed probes == dead"); - -static int keepalive_intvl = 5; -CFS_MODULE_PARM(keepalive_intvl, "i", int, 0644, - "seconds between probes"); - -static int enable_csum = 0; -CFS_MODULE_PARM(enable_csum, "i", int, 0644, - "enable check sum"); - -static int inject_csum_error = 0; -CFS_MODULE_PARM(inject_csum_error, "i", int, 0644, - "set non-zero to inject a checksum error"); -#ifdef CPU_AFFINITY -static int enable_irq_affinity = 0; -CFS_MODULE_PARM(enable_irq_affinity, "i", int, 0644, - "enable IRQ affinity"); -#endif - -static unsigned int zc_min_frag = (2<<10); -CFS_MODULE_PARM(zc_min_frag, "i", int, 0644, - "minimum fragment to zero copy"); - -#ifdef SOCKNAL_BACKOFF -static int backoff_init = 3; -CFS_MODULE_PARM(backoff_init, "i", int, 0644, - "seconds for initial tcp backoff"); - -static int backoff_max = 3; -CFS_MODULE_PARM(backoff_max, "i", int, 0644, - "seconds for maximum tcp backoff"); -#endif - -#if SOCKNAL_VERSION_DEBUG -static int protocol = 2; -CFS_MODULE_PARM(protocol, "i", int, 0644, - "protocol version"); -#endif - -ksock_tunables_t ksocknal_tunables = { - .ksnd_timeout = &sock_timeout, - .ksnd_credits = &credits, - .ksnd_peercredits = &peer_credits, - .ksnd_nconnds = &nconnds, - .ksnd_min_reconnectms = &min_reconnectms, - .ksnd_max_reconnectms = &max_reconnectms, - .ksnd_eager_ack = &eager_ack, - .ksnd_typed_conns = &typed_conns, - .ksnd_min_bulk = &min_bulk, - .ksnd_tx_buffer_size = &tx_buffer_size, - .ksnd_rx_buffer_size = &rx_buffer_size, - .ksnd_nagle = &nagle, - .ksnd_keepalive_idle = &keepalive_idle, - .ksnd_keepalive_count = &keepalive_count, - .ksnd_keepalive_intvl = &keepalive_intvl, - .ksnd_enable_csum = &enable_csum, - .ksnd_inject_csum_error = &inject_csum_error, - .ksnd_zc_min_frag = &zc_min_frag, -#ifdef CPU_AFFINITY - .ksnd_irq_affinity = &enable_irq_affinity, -#endif -#ifdef SOCKNAL_BACKOFF - .ksnd_backoff_init = &backoff_init, - .ksnd_backoff_max = &backoff_max, -#endif -#if SOCKNAL_VERSION_DEBUG - .ksnd_protocol = &protocol, -#endif -}; - diff --git a/lnet/klnds/viblnd/.cvsignore b/lnet/klnds/viblnd/.cvsignore deleted file mode 100644 index 2e9b6f47052e4a9724b08b6336229b01d72676a4..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend -wirecheck diff --git a/lnet/klnds/viblnd/Makefile.in b/lnet/klnds/viblnd/Makefile.in deleted file mode 100644 index 5b5c2db4ad030cd840b6cb78f301a9ce4a87d396..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kviblnd -kviblnd-objs := viblnd.o viblnd_cb.o viblnd_modparams.o - -EXTRA_POST_CFLAGS := @VIBCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/viblnd/autoMakefile.am b/lnet/klnds/viblnd/autoMakefile.am deleted file mode 100644 index 19861a9fe6186728ccf1a6c821beb97ad32c08e1..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_VIBLND -modulenet_DATA = kviblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kviblnd-objs:%.o=%.c) viblnd.h viblnd_wire.h diff --git a/lnet/klnds/viblnd/viblnd.c b/lnet/klnds/viblnd/viblnd.c deleted file mode 100644 index 0ab15bff1f960c8ae3d022b97539385ef8a3ecaa..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd.c +++ /dev/null @@ -1,2017 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Author: Frank Zago <fzago@systemfabricworks.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "viblnd.h" - -lnd_t the_kiblnd = { - .lnd_type = VIBLND, - .lnd_startup = kibnal_startup, - .lnd_shutdown = kibnal_shutdown, - .lnd_ctl = kibnal_ctl, - .lnd_send = kibnal_send, - .lnd_recv = kibnal_recv, - .lnd_eager_recv = kibnal_eager_recv, -}; - -kib_data_t kibnal_data; - -void vibnal_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux robert 2.6.11-1.27_FC3 #1 Tue May 17 20:27:37 EDT 2005 i686 athlon i386 G - * with gcc version 3.4.3 20050227 (Red Hat 3.4.3-22.fc3) */ - - - /* Constants... */ - CLASSERT (IBNAL_MSG_MAGIC == 0x0be91b91); - CLASSERT (IBNAL_MSG_VERSION == 0x11); - CLASSERT (IBNAL_MSG_CONNREQ == 0xc0); - CLASSERT (IBNAL_MSG_CONNACK == 0xc1); - CLASSERT (IBNAL_MSG_NOOP == 0xd0); - CLASSERT (IBNAL_MSG_IMMEDIATE == 0xd1); - CLASSERT (IBNAL_MSG_PUT_REQ == 0xd2); - CLASSERT (IBNAL_MSG_PUT_NAK == 0xd3); - CLASSERT (IBNAL_MSG_PUT_ACK == 0xd4); - CLASSERT (IBNAL_MSG_PUT_DONE == 0xd5); - CLASSERT (IBNAL_MSG_GET_REQ == 0xd6); - CLASSERT (IBNAL_MSG_GET_DONE == 0xd7); - - /* Checks for struct kib_connparams_t */ - CLASSERT ((int)sizeof(kib_connparams_t) == 12); - CLASSERT ((int)offsetof(kib_connparams_t, ibcp_queue_depth) == 0); - CLASSERT ((int)sizeof(((kib_connparams_t *)0)->ibcp_queue_depth) == 4); - CLASSERT ((int)offsetof(kib_connparams_t, ibcp_max_msg_size) == 4); - CLASSERT ((int)sizeof(((kib_connparams_t *)0)->ibcp_max_msg_size) == 4); - CLASSERT ((int)offsetof(kib_connparams_t, ibcp_max_frags) == 8); - CLASSERT ((int)sizeof(((kib_connparams_t *)0)->ibcp_max_frags) == 4); - - /* Checks for struct kib_immediate_msg_t */ - CLASSERT ((int)sizeof(kib_immediate_msg_t) == 72); - CLASSERT ((int)offsetof(kib_immediate_msg_t, ibim_hdr) == 0); - CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_hdr) == 72); - CLASSERT ((int)offsetof(kib_immediate_msg_t, ibim_payload[13]) == 85); - CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_payload[13]) == 1); - CLASSERT (IBNAL_USE_FMR == 1); - - /* Checks for struct kib_rdma_desc_t */ - CLASSERT ((int)sizeof(kib_rdma_desc_t) == 16); - CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_addr) == 0); - CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_addr) == 8); - CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_nob) == 8); - CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_nob) == 4); - CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_key) == 12); - CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_key) == 4); - - /* Checks for struct kib_putreq_msg_t */ - CLASSERT ((int)sizeof(kib_putreq_msg_t) == 80); - CLASSERT ((int)offsetof(kib_putreq_msg_t, ibprm_hdr) == 0); - CLASSERT ((int)sizeof(((kib_putreq_msg_t *)0)->ibprm_hdr) == 72); - CLASSERT ((int)offsetof(kib_putreq_msg_t, ibprm_cookie) == 72); - CLASSERT ((int)sizeof(((kib_putreq_msg_t *)0)->ibprm_cookie) == 8); - - /* Checks for struct kib_putack_msg_t */ - CLASSERT ((int)sizeof(kib_putack_msg_t) == 32); - CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_src_cookie) == 0); - CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_src_cookie) == 8); - CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_dst_cookie) == 8); - CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_dst_cookie) == 8); - CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_rd) == 16); - CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_rd) == 16); - - /* Checks for struct kib_get_msg_t */ - CLASSERT ((int)sizeof(kib_get_msg_t) == 96); - CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_hdr) == 0); - CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_hdr) == 72); - CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_cookie) == 72); - CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_cookie) == 8); - CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_rd) == 80); - CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_rd) == 16); - - /* Checks for struct kib_completion_msg_t */ - CLASSERT ((int)sizeof(kib_completion_msg_t) == 12); - CLASSERT ((int)offsetof(kib_completion_msg_t, ibcm_cookie) == 0); - CLASSERT ((int)sizeof(((kib_completion_msg_t *)0)->ibcm_cookie) == 8); - CLASSERT ((int)offsetof(kib_completion_msg_t, ibcm_status) == 8); - CLASSERT ((int)sizeof(((kib_completion_msg_t *)0)->ibcm_status) == 4); - - /* Checks for struct kib_msg_t */ - CLASSERT ((int)sizeof(kib_msg_t) == 152); - CLASSERT ((int)offsetof(kib_msg_t, ibm_magic) == 0); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_magic) == 4); - CLASSERT ((int)offsetof(kib_msg_t, ibm_version) == 4); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_version) == 2); - CLASSERT ((int)offsetof(kib_msg_t, ibm_type) == 6); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_type) == 1); - CLASSERT ((int)offsetof(kib_msg_t, ibm_credits) == 7); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_credits) == 1); - CLASSERT ((int)offsetof(kib_msg_t, ibm_nob) == 8); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_nob) == 4); - CLASSERT ((int)offsetof(kib_msg_t, ibm_cksum) == 12); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_cksum) == 4); - CLASSERT ((int)offsetof(kib_msg_t, ibm_srcnid) == 16); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_srcnid) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_srcstamp) == 24); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_srcstamp) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_dstnid) == 32); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_dstnid) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_dststamp) == 40); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_dststamp) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_seq) == 48); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_seq) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.connparams) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.connparams) == 12); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.immediate) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.immediate) == 72); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putreq) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putreq) == 80); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putack) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putack) == 32); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.get) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.get) == 96); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.completion) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.completion) == 12); -} - -__u32 -kibnal_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kibnal_init_msg(kib_msg_t *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob; -} - -void -kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, - lnet_nid_t dstnid, __u64 dststamp, __u64 seq) -{ - /* CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. */ - msg->ibm_magic = IBNAL_MSG_MAGIC; - msg->ibm_version = version; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid, - dstnid); - msg->ibm_srcstamp = kibnal_data.kib_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - msg->ibm_seq = seq; - - if (*kibnal_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob); - } -} - -int -kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) -{ - const int hdr_size = offsetof(kib_msg_t, ibm_u); - __u32 msg_cksum; - __u32 msg_version; - int flip; - int msg_nob; -#if !IBNAL_USE_FMR - int i; - int n; -#endif - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - /* Future protocol version compatibility support! - * If the viblnd-specific protocol changes, or when LNET unifies - * protocols over all LNDs, the initial connection will negotiate a - * protocol version. If I find this, I avoid any console errors. If - * my is doing connection establishment, the reject will tell the peer - * which version I'm running. */ - - if (msg->ibm_magic == IBNAL_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) { - flip = 1; - } else { - if (msg->ibm_magic == LNET_PROTO_MAGIC || - msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) - return -EPROTO; - - /* Completely out to lunch */ - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; - if (expected_version == 0) { - if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD && - msg_version != IBNAL_MSG_VERSION) - return -EPROTO; - } else if (msg_version != expected_version) { - CERROR("Bad version: %x(%x expected)\n", - msg_version, expected_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kibnal_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - msg->ibm_version = msg_version; - CLASSERT (sizeof(msg->ibm_type) == 1); - CLASSERT (sizeof(msg->ibm_credits) == 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - __swab64s(&msg->ibm_seq); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_IMMEDIATE: - if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { - CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) { - CERROR("Short PUT_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putreq))); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_ACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putack))); - return -EPROTO; - } -#if IBNAL_USE_FMR - if (flip) { - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag); - } - - n = msg->ibm_u.putack.ibpam_rd.rd_nfrag; - if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", - n, IBNAL_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) { - for (i = 0; i < n; i++) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_lo); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_hi); - } - } -#endif - break; - - case IBNAL_MSG_GET_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.get))); - return -EPROTO; - } -#if IBNAL_USE_FMR - if (flip) { - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag); - } - - n = msg->ibm_u.get.ibgm_rd.rd_nfrag; - if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", - n, IBNAL_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) - for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_lo); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_hi); - } -#endif - break; - - case IBNAL_MSG_PUT_NAK: - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { - CERROR("Short RDMA completion: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.completion))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - - case IBNAL_MSG_CONNREQ: - case IBNAL_MSG_CONNACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { - CERROR("Short connreq/ack: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.connparams))); - return -EPROTO; - } - if (flip) { - __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth); - __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); - __swab32s(&msg->ibm_u.connparams.ibcp_max_frags); - } - break; - } - return 0; -} - -int -kibnal_start_listener (lnet_ni_t *ni) -{ - static cm_listen_data_t info; - - cm_return_t cmrc; - - LASSERT (kibnal_data.kib_listen_handle == NULL); - - kibnal_data.kib_listen_handle = - cm_create_cep(cm_cep_transp_rc); - if (kibnal_data.kib_listen_handle == NULL) { - CERROR ("Can't create listen CEP\n"); - return -ENOMEM; - } - - CDEBUG(D_NET, "Created CEP %p for listening\n", - kibnal_data.kib_listen_handle); - - memset(&info, 0, sizeof(info)); - info.listen_addr.end_pt.sid = - (__u64)(*kibnal_tunables.kib_service_number); - - cmrc = cm_listen(kibnal_data.kib_listen_handle, &info, - kibnal_listen_callback, NULL); - if (cmrc == cm_stat_success) - return 0; - - CERROR ("cm_listen error: %d\n", cmrc); - - cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle); - LASSERT (cmrc == cm_stat_success); - - kibnal_data.kib_listen_handle = NULL; - return -EINVAL; -} - -void -kibnal_stop_listener(lnet_ni_t *ni) -{ - cm_return_t cmrc; - - LASSERT (kibnal_data.kib_listen_handle != NULL); - - cmrc = cm_cancel(kibnal_data.kib_listen_handle); - if (cmrc != cm_stat_success) - CERROR ("Error %d stopping listener\n", cmrc); - - cfs_pause(cfs_time_seconds(1)/10); /* ensure no more callbacks */ - - cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle); - if (cmrc != vv_return_ok) - CERROR ("Error %d destroying CEP\n", cmrc); - - kibnal_data.kib_listen_handle = NULL; -} - -int -kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid) -{ - kib_peer_t *peer; - unsigned long flags; - int rc; - - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC(peer, sizeof (*peer)); - if (peer == NULL) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->ibp_nid = nid; - atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD (&peer->ibp_conns); - INIT_LIST_HEAD (&peer->ibp_tx_queue); - - peer->ibp_error = 0; - peer->ibp_last_alive = cfs_time_current(); - peer->ibp_reconnect_interval = 0; /* OK to connect at any time */ - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (atomic_read(&kibnal_data.kib_npeers) >= - *kibnal_tunables.kib_concurrent_peers) { - rc = -EOVERFLOW; /* !! but at least it distinguishes */ - } else if (kibnal_data.kib_listen_handle == NULL) { - rc = -ESHUTDOWN; /* shutdown has started */ - } else { - rc = 0; - /* npeers only grows with the global lock held */ - atomic_inc(&kibnal_data.kib_npeers); - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (rc != 0) { - CERROR("Can't create peer: %s\n", - (rc == -ESHUTDOWN) ? "shutting down" : - "too many peers"); - LIBCFS_FREE(peer, sizeof(*peer)); - } else { - *peerp = peer; - } - - return rc; -} - -void -kibnal_destroy_peer (kib_peer_t *peer) -{ - LASSERT (atomic_read (&peer->ibp_refcount) == 0); - LASSERT (peer->ibp_persistence == 0); - LASSERT (!kibnal_peer_active(peer)); - LASSERT (peer->ibp_connecting == 0); - LASSERT (peer->ibp_accepting == 0); - LASSERT (list_empty (&peer->ibp_conns)); - LASSERT (list_empty (&peer->ibp_tx_queue)); - - LIBCFS_FREE (peer, sizeof (*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec(&kibnal_data.kib_npeers); -} - -kib_peer_t * -kibnal_find_peer_locked (lnet_nid_t nid) -{ - /* the caller is responsible for accounting the additional reference - * that this creates */ - struct list_head *peer_list = kibnal_nid2peerlist (nid); - struct list_head *tmp; - kib_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry (tmp, kib_peer_t, ibp_list); - - LASSERT (peer->ibp_persistence != 0 || /* persistent peer */ - peer->ibp_connecting != 0 || /* creating conns */ - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); /* active conn */ - - if (peer->ibp_nid != nid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_nid2str(nid), - atomic_read (&peer->ibp_refcount)); - return (peer); - } - return (NULL); -} - -void -kibnal_unlink_peer_locked (kib_peer_t *peer) -{ - LASSERT (peer->ibp_persistence == 0); - LASSERT (list_empty(&peer->ibp_conns)); - - LASSERT (kibnal_peer_active(peer)); - list_del_init (&peer->ibp_list); - /* lose peerlist's ref */ - kibnal_peer_decref(peer); -} - -int -kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, - int *persistencep) -{ - kib_peer_t *peer; - struct list_head *ptmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *ipp = peer->ibp_ip; - *persistencep = peer->ibp_persistence; - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (0); - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (-ENOENT); -} - -int -kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip) -{ - kib_peer_t *peer; - kib_peer_t *peer2; - unsigned long flags; - int rc; - - CDEBUG(D_NET, "%s at %u.%u.%u.%u\n", - libcfs_nid2str(nid), HIPQUAD(ip)); - - if (nid == LNET_NID_ANY) - return (-EINVAL); - - rc = kibnal_create_peer(&peer, nid); - if (rc != 0) - return rc; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - /* I'm always called with a reference on kibnal_data.kib_ni - * so shutdown can't have started */ - LASSERT (kibnal_data.kib_listen_handle != NULL); - - peer2 = kibnal_find_peer_locked (nid); - if (peer2 != NULL) { - kibnal_peer_decref (peer); - peer = peer2; - } else { - /* peer table takes existing ref on peer */ - list_add_tail (&peer->ibp_list, - kibnal_nid2peerlist (nid)); - } - - peer->ibp_ip = ip; - peer->ibp_persistence++; - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (0); -} - -void -kibnal_del_peer_locked (kib_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kib_conn_t *conn; - - peer->ibp_persistence = 0; - - if (list_empty(&peer->ibp_conns)) { - kibnal_unlink_peer_locked(peer); - } else { - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - kibnal_close_conn_locked (conn, 0); - } - /* NB peer is no longer persistent; closing its last conn - * unlinked it. */ - } - /* NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. */ -} - -int -kibnal_del_peer (lnet_nid_t nid) -{ - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - kib_peer_t *peer; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT (list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, &zombies); - } - - kibnal_del_peer_locked (peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_txlist_done(&zombies, -EIO); - - return (rc); -} - -kib_conn_t * -kibnal_get_conn_by_idx (int index) -{ - kib_peer_t *peer; - struct list_head *ptmp; - kib_conn_t *conn; - struct list_head *ctmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence > 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - list_for_each (ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry (ctmp, kib_conn_t, ibc_list); - kibnal_conn_addref(conn); - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (conn); - } - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (NULL); -} - -void -kibnal_debug_rx (kib_rx_t *rx) -{ - CDEBUG(D_CONSOLE, " %p nob %d msg_type %x " - "cred %d seq "LPD64"\n", - rx, rx->rx_nob, rx->rx_msg->ibm_type, - rx->rx_msg->ibm_credits, rx->rx_msg->ibm_seq); -} - -void -kibnal_debug_tx (kib_tx_t *tx) -{ - CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lx " - "cookie "LPX64" msg %s%s type %x cred %d seq "LPD64"\n", - tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting, - tx->tx_status, tx->tx_deadline, tx->tx_cookie, - tx->tx_lntmsg[0] == NULL ? "-" : "!", - tx->tx_lntmsg[1] == NULL ? "-" : "!", - tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits, - tx->tx_msg->ibm_seq); -} - -void -kibnal_debug_conn (kib_conn_t *conn) -{ - struct list_head *tmp; - int i; - - spin_lock(&conn->ibc_lock); - - CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n", - atomic_read(&conn->ibc_refcount), conn, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - CDEBUG(D_CONSOLE, " txseq "LPD64" rxseq "LPD64" state %d \n", - conn->ibc_txseq, conn->ibc_rxseq, conn->ibc_state); - CDEBUG(D_CONSOLE, " nposted %d cred %d o_cred %d r_cred %d\n", - conn->ibc_nsends_posted, conn->ibc_credits, - conn->ibc_outstanding_credits, conn->ibc_reserved_credits); - CDEBUG(D_CONSOLE, " disc %d comms_err %d\n", - conn->ibc_disconnect, conn->ibc_comms_error); - - CDEBUG(D_CONSOLE, " early_rxs:\n"); - list_for_each(tmp, &conn->ibc_early_rxs) - kibnal_debug_rx(list_entry(tmp, kib_rx_t, rx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_nocred:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_nocred) - kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_rsrvd) - kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue:\n"); - list_for_each(tmp, &conn->ibc_tx_queue) - kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " active_txs:\n"); - list_for_each(tmp, &conn->ibc_active_txs) - kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " rxs:\n"); - for (i = 0; i < IBNAL_RX_MSGS; i++) - kibnal_debug_rx(&conn->ibc_rxs[i]); - - spin_unlock(&conn->ibc_lock); -} - -int -kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state) -{ - static vv_qp_attr_t attr; - - kib_connvars_t *cv = conn->ibc_connvars; - vv_return_t vvrc; - - /* Only called by connd => static OK */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - memset(&attr, 0, sizeof(attr)); - - switch (new_state) { - default: - LBUG(); - - case vv_qp_state_init: { - struct vv_qp_modify_init_st *init = &attr.modify.params.init; - - init->p_key_indx = cv->cv_pkey_index; - init->phy_port_num = cv->cv_port; - init->q_key = IBNAL_QKEY; /* XXX but VV_QP_AT_Q_KEY not set! */ - init->access_control = vv_acc_r_mem_read | - vv_acc_r_mem_write; /* XXX vv_acc_l_mem_write ? */ - - attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX | - VV_QP_AT_PHY_PORT_NUM | - VV_QP_AT_ACCESS_CON_F; - break; - } - case vv_qp_state_rtr: { - struct vv_qp_modify_rtr_st *rtr = &attr.modify.params.rtr; - vv_add_vec_t *av = &rtr->remote_add_vec; - - av->dlid = cv->cv_path.dlid; - av->grh_flag = (!IBNAL_LOCAL_SUB); - av->max_static_rate = IBNAL_R_2_STATIC_RATE(cv->cv_path.rate); - av->service_level = cv->cv_path.sl; - av->source_path_bit = IBNAL_SOURCE_PATH_BIT; - av->pmtu = cv->cv_path.mtu; - av->rnr_retry_count = cv->cv_rnr_count; - av->global_dest.traffic_class = cv->cv_path.traffic_class; - av->global_dest.hope_limit = cv->cv_path.hop_limut; - av->global_dest.flow_lable = cv->cv_path.flow_label; - av->global_dest.s_gid_index = cv->cv_sgid_index; - // XXX other av fields zero? - - rtr->destanation_qp = cv->cv_remote_qpn; - rtr->receive_psn = cv->cv_rxpsn; - rtr->responder_rdma_r_atom_num = IBNAL_OUS_DST_RD; - rtr->opt_min_rnr_nak_timer = *kibnal_tunables.kib_rnr_nak_timer; - - - // XXX sdp sets VV_QP_AT_OP_F but no actual optional options - attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC | - VV_QP_AT_DEST_QP | - VV_QP_AT_R_PSN | - VV_QP_AT_MIN_RNR_NAK_T | - VV_QP_AT_RESP_RDMA_ATOM_OUT_NUM | - VV_QP_AT_OP_F; - break; - } - case vv_qp_state_rts: { - struct vv_qp_modify_rts_st *rts = &attr.modify.params.rts; - - rts->send_psn = cv->cv_txpsn; - rts->local_ack_timeout = *kibnal_tunables.kib_local_ack_timeout; - rts->retry_num = *kibnal_tunables.kib_retry_cnt; - rts->rnr_num = *kibnal_tunables.kib_rnr_cnt; - rts->dest_out_rdma_r_atom_num = IBNAL_OUS_DST_RD; - - attr.modify.vv_qp_attr_mask = VV_QP_AT_S_PSN | - VV_QP_AT_L_ACK_T | - VV_QP_AT_RETRY_NUM | - VV_QP_AT_RNR_NUM | - VV_QP_AT_DEST_RDMA_ATOM_OUT_NUM; - break; - } - case vv_qp_state_error: - case vv_qp_state_reset: - attr.modify.vv_qp_attr_mask = 0; - break; - } - - attr.modify.qp_modify_into_state = new_state; - attr.modify.vv_qp_attr_mask |= VV_QP_AT_STATE; - - vvrc = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &attr, NULL); - if (vvrc != vv_return_ok) { - CERROR("Can't modify qp -> %s state to %d: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - new_state, vvrc); - return -EIO; - } - - return 0; -} - -kib_conn_t * -kibnal_create_conn (cm_cep_handle_t cep) -{ - kib_conn_t *conn; - int i; - int page_offset; - int ipage; - vv_return_t vvrc; - int rc; - - static vv_qp_attr_t reqattr; - static vv_qp_attr_t rspattr; - - /* Only the connd creates conns => single threaded */ - LASSERT(!in_interrupt()); - LASSERT(current == kibnal_data.kib_connd); - - LIBCFS_ALLOC(conn, sizeof (*conn)); - if (conn == NULL) { - CERROR ("Can't allocate connection\n"); - return (NULL); - } - - /* zero flags, NULL pointers etc... */ - memset (conn, 0, sizeof (*conn)); - - conn->ibc_version = IBNAL_MSG_VERSION; /* Use latest version at first */ - - INIT_LIST_HEAD (&conn->ibc_early_rxs); - INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD (&conn->ibc_tx_queue); - INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD (&conn->ibc_active_txs); - spin_lock_init (&conn->ibc_lock); - - atomic_inc (&kibnal_data.kib_nconns); - /* well not really, but I call destroy() on failure, which decrements */ - - conn->ibc_cep = cep; - - LIBCFS_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - if (conn->ibc_connvars == NULL) { - CERROR("Can't allocate in-progress connection state\n"); - goto failed; - } - memset (conn->ibc_connvars, 0, sizeof(*conn->ibc_connvars)); - /* Random seed for QP sequence number */ - get_random_bytes(&conn->ibc_connvars->cv_rxpsn, - sizeof(conn->ibc_connvars->cv_rxpsn)); - - LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t)); - if (conn->ibc_rxs == NULL) { - CERROR("Cannot allocate RX buffers\n"); - goto failed; - } - memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES, 1); - if (rc != 0) - goto failed; - - for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) { - struct page *page = conn->ibc_rx_pages->ibp_pages[ipage]; - kib_rx_t *rx = &conn->ibc_rxs[i]; - vv_mem_reg_h_t mem_h; - vv_r_key_t r_key; - - rx->rx_conn = conn; - rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca, - rx->rx_msg, - IBNAL_MSG_SIZE, - &mem_h, - &rx->rx_lkey, - &r_key); - LASSERT (vvrc == vv_return_ok); - - CDEBUG(D_NET, "Rx[%d] %p->%p[%x]\n", i, rx, - rx->rx_msg, rx->rx_lkey); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_RX_MSG_PAGES); - } - } - - memset(&reqattr, 0, sizeof(reqattr)); - - reqattr.create.qp_type = vv_qp_type_r_conn; - reqattr.create.cq_send_h = kibnal_data.kib_cq; - reqattr.create.cq_receive_h = kibnal_data.kib_cq; - reqattr.create.send_max_outstand_wr = (1 + IBNAL_MAX_RDMA_FRAGS) * - (*kibnal_tunables.kib_concurrent_sends); - reqattr.create.receive_max_outstand_wr = IBNAL_RX_MSGS; - reqattr.create.max_scatgat_per_send_wr = 1; - reqattr.create.max_scatgat_per_receive_wr = 1; - reqattr.create.signaling_type = vv_selectable_signaling; - reqattr.create.pd_h = kibnal_data.kib_pd; - reqattr.create.recv_solicited_events = vv_selectable_signaling; // vv_signal_all; - - vvrc = vv_qp_create(kibnal_data.kib_hca, &reqattr, NULL, - &conn->ibc_qp, &rspattr); - if (vvrc != vv_return_ok) { - CERROR ("Failed to create queue pair: %d\n", vvrc); - goto failed; - } - - /* Mark QP created */ - conn->ibc_state = IBNAL_CONN_INIT_QP; - conn->ibc_connvars->cv_local_qpn = rspattr.create_return.qp_num; - - if (rspattr.create_return.receive_max_outstand_wr < - IBNAL_RX_MSGS || - rspattr.create_return.send_max_outstand_wr < - (1 + IBNAL_MAX_RDMA_FRAGS) * (*kibnal_tunables.kib_concurrent_sends)) { - CERROR("Insufficient rx/tx work items: wanted %d/%d got %d/%d\n", - IBNAL_RX_MSGS, - (1 + IBNAL_MAX_RDMA_FRAGS) * - (*kibnal_tunables.kib_concurrent_sends), - rspattr.create_return.receive_max_outstand_wr, - rspattr.create_return.send_max_outstand_wr); - goto failed; - } - - /* Mark init complete */ - conn->ibc_state = IBNAL_CONN_INIT; - - /* 1 ref for caller */ - atomic_set (&conn->ibc_refcount, 1); - return (conn); - - failed: - kibnal_destroy_conn (conn); - return (NULL); -} - -void -kibnal_destroy_conn (kib_conn_t *conn) -{ - vv_return_t vvrc; - - /* Only the connd does this (i.e. single threaded) */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - CDEBUG (D_NET, "connection %p\n", conn); - - LASSERT (atomic_read (&conn->ibc_refcount) == 0); - LASSERT (list_empty(&conn->ibc_early_rxs)); - LASSERT (list_empty(&conn->ibc_tx_queue)); - LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT (list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT (list_empty(&conn->ibc_active_txs)); - LASSERT (conn->ibc_nsends_posted == 0); - - switch (conn->ibc_state) { - default: - /* conn must be completely disengaged from the network */ - LBUG(); - - case IBNAL_CONN_DISCONNECTED: - /* connvars should have been freed already */ - LASSERT (conn->ibc_connvars == NULL); - /* fall through */ - - case IBNAL_CONN_INIT: - vvrc = cm_destroy_cep(conn->ibc_cep); - LASSERT (vvrc == vv_return_ok); - /* fall through */ - - case IBNAL_CONN_INIT_QP: - kibnal_set_qp_state(conn, vv_qp_state_reset); - vvrc = vv_qp_destroy(kibnal_data.kib_hca, conn->ibc_qp); - if (vvrc != vv_return_ok) - CERROR("Can't destroy QP: %d\n", vvrc); - /* fall through */ - - case IBNAL_CONN_INIT_NOTHING: - break; - } - - if (conn->ibc_rx_pages != NULL) - kibnal_free_pages(conn->ibc_rx_pages); - - if (conn->ibc_rxs != NULL) - LIBCFS_FREE(conn->ibc_rxs, - IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - if (conn->ibc_connvars != NULL) - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - - if (conn->ibc_peer != NULL) - kibnal_peer_decref(conn->ibc_peer); - - LIBCFS_FREE(conn, sizeof (*conn)); - - atomic_dec(&kibnal_data.kib_nconns); -} - -int -kibnal_close_peer_conns_locked (kib_peer_t *peer, int why) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - count++; - kibnal_close_conn_locked (conn, why); - } - - return (count); -} - -int -kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - if (conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_incarnation, incarnation); - - count++; - kibnal_close_conn_locked (conn, -ESTALE); - } - - return (count); -} - -int -kibnal_close_matching_conns (lnet_nid_t nid) -{ - kib_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - unsigned long flags; - int count = 0; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kibnal_close_peer_conns_locked (peer, 0); - } - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return (0); - - return (count == 0 ? -ENOENT : 0); -} - -int -kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - LASSERT (ni == kibnal_data.kib_ni); - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - __u32 ip = 0; - int share_count = 0; - - rc = kibnal_get_peer_info(data->ioc_count, - &nid, &ip, &share_count); - data->ioc_nid = nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = *kibnal_tunables.kib_service_number; /* port */ - break; - } - case IOC_LIBCFS_ADD_PEER: { - rc = kibnal_add_persistent_peer (data->ioc_nid, - data->ioc_u32[0]); /* IP */ - break; - } - case IOC_LIBCFS_DEL_PEER: { - rc = kibnal_del_peer (data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count); - - if (conn == NULL) - rc = -ENOENT; - else { - // kibnal_debug_conn(conn); - rc = 0; - data->ioc_nid = conn->ibc_peer->ibp_nid; - kibnal_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kibnal_close_matching_conns (data->ioc_nid); - break; - } - case IOC_LIBCFS_REGISTER_MYNID: { - if (ni->ni_nid == data->ioc_nid) { - rc = 0; - } else { - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - rc = -EINVAL; - } - break; - } - } - - return rc; -} - -void -kibnal_free_pages (kib_pages_t *p) -{ - int npages = p->ibp_npages; - int i; - - for (i = 0; i < npages; i++) - if (p->ibp_pages[i] != NULL) - __free_page(p->ibp_pages[i]); - - LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); -} - -int -kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write) -{ - kib_pages_t *p; - int i; - - LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages])); - if (p == NULL) { - CERROR ("Can't allocate buffer %d\n", npages); - return (-ENOMEM); - } - - memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages])); - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_page (GFP_KERNEL); - if (p->ibp_pages[i] == NULL) { - CERROR ("Can't allocate page %d of %d\n", i, npages); - kibnal_free_pages(p); - return (-ENOMEM); - } - } - - *pp = p; - return (0); -} - -int -kibnal_alloc_tx_descs (void) -{ - int i; - - LIBCFS_ALLOC (kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - if (kibnal_data.kib_tx_descs == NULL) - return -ENOMEM; - - memset(kibnal_data.kib_tx_descs, 0, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - kib_tx_t *tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); - if (tx->tx_pages == NULL) - return -ENOMEM; -#else - LIBCFS_ALLOC(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - if (tx->tx_wrq == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_gl)); - if (tx->tx_gl == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBNAL_MAX_RDMA_FRAGS])); - if (tx->tx_rd == NULL) - return -ENOMEM; -#endif - } - - return 0; -} - -void -kibnal_free_tx_descs (void) -{ - int i; - - if (kibnal_data.kib_tx_descs == NULL) - return; - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - kib_tx_t *tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - if (tx->tx_pages != NULL) - LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); -#else - if (tx->tx_wrq != NULL) - LIBCFS_FREE(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - - if (tx->tx_gl != NULL) - LIBCFS_FREE(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_gl)); - - if (tx->tx_rd != NULL) - LIBCFS_FREE(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBNAL_MAX_RDMA_FRAGS])); -#endif - } - - LIBCFS_FREE(kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); -} - -#if IBNAL_USE_FMR -void -kibnal_free_fmrs (int n) -{ - int i; - vv_return_t vvrc; - kib_tx_t *tx; - - for (i = 0; i < n; i++) { - tx = &kibnal_data.kib_tx_descs[i]; - - vvrc = vv_free_fmr(kibnal_data.kib_hca, - tx->tx_md.md_fmrhandle); - if (vvrc != vv_return_ok) - CWARN("vv_free_fmr[%d]: %d\n", i, vvrc); - } -} -#endif - -int -kibnal_setup_tx_descs (void) -{ - int ipage = 0; - int page_offset = 0; - struct page *page; - kib_tx_t *tx; - vv_mem_reg_h_t mem_h; - vv_r_key_t rkey; - vv_return_t vvrc; - int i; - int rc; -#if IBNAL_USE_FMR - vv_fmr_t fmr_props; -#endif - - /* pre-mapped messages are not bigger than 1 page */ - CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0); - - rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, - IBNAL_TX_MSG_PAGES(), 0); - if (rc != 0) - return (rc); - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - page = kibnal_data.kib_tx_pages->ibp_pages[ipage]; - tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - memset(&fmr_props, 0, sizeof(fmr_props)); - fmr_props.pd_hndl = kibnal_data.kib_pd; - fmr_props.acl = (vv_acc_r_mem_write | - vv_acc_l_mem_write); - fmr_props.max_pages = LNET_MAX_IOV; - fmr_props.log2_page_sz = PAGE_SHIFT; - fmr_props.max_outstanding_maps = *kibnal_tunables.kib_fmr_remaps; - - vvrc = vv_alloc_fmr(kibnal_data.kib_hca, - &fmr_props, - &tx->tx_md.md_fmrhandle); - if (vvrc != vv_return_ok) { - CERROR("Can't allocate fmr %d: %d\n", i, vvrc); - kibnal_free_fmrs(i); - kibnal_free_pages (kibnal_data.kib_tx_pages); - return -ENOMEM; - } - - tx->tx_md.md_fmrcount = *kibnal_tunables.kib_fmr_remaps; - tx->tx_md.md_active = 0; -#endif - tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca, - tx->tx_msg, - IBNAL_MSG_SIZE, - &mem_h, - &tx->tx_lkey, - &rkey); - LASSERT (vvrc == vv_return_ok); - - CDEBUG(D_NET, "Tx[%d] %p->%p[%x]\n", i, tx, - tx->tx_msg, tx->tx_lkey); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_TX_MSG_PAGES()); - } - } - - return (0); -} - -void -kibnal_shutdown (lnet_ni_t *ni) -{ - int i; - vv_return_t vvrc; - - LASSERT (ni == kibnal_data.kib_ni); - LASSERT (ni->ni_data == &kibnal_data); - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - switch (kibnal_data.kib_init) { - - case IBNAL_INIT_ALL: - /* stop accepting connections and prevent new peers */ - kibnal_stop_listener(ni); - - /* nuke all existing peers */ - kibnal_del_peer(LNET_NID_ANY); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read(&kibnal_data.kib_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */ - "waiting for %d peers to disconnect\n", - atomic_read(&kibnal_data.kib_npeers)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case IBNAL_INIT_CQ: - vvrc = vv_cq_destroy(kibnal_data.kib_hca, kibnal_data.kib_cq); - if (vvrc != vv_return_ok) - CERROR ("Destroy CQ error: %d\n", vvrc); - /* fall through */ - - case IBNAL_INIT_TXD: - kibnal_free_pages (kibnal_data.kib_tx_pages); -#if IBNAL_USE_FMR - kibnal_free_fmrs(IBNAL_TX_MSGS()); -#endif - /* fall through */ - - case IBNAL_INIT_PD: -#if 0 - /* Only deallocate a PD if we actually allocated one */ - vvrc = vv_pd_deallocate(kibnal_data.kib_hca, - kibnal_data.kib_pd); - if (vvrc != vv_return_ok) - CERROR ("Destroy PD error: %d\n", vvrc); -#endif - /* fall through */ - - case IBNAL_INIT_ASYNC: - vvrc = vv_dell_async_event_cb (kibnal_data.kib_hca, - kibnal_async_callback); - if (vvrc != vv_return_ok) - CERROR("vv_dell_async_event_cb error: %d\n", vvrc); - - /* fall through */ - - case IBNAL_INIT_HCA: - vvrc = vv_hca_close(kibnal_data.kib_hca); - if (vvrc != vv_return_ok) - CERROR ("Close HCA error: %d\n", vvrc); - /* fall through */ - - case IBNAL_INIT_DATA: - LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0); - LASSERT (kibnal_data.kib_peers != NULL); - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - LASSERT (list_empty (&kibnal_data.kib_peers[i])); - } - LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0); - LASSERT (list_empty (&kibnal_data.kib_connd_zombies)); - LASSERT (list_empty (&kibnal_data.kib_connd_conns)); - LASSERT (list_empty (&kibnal_data.kib_connd_pcreqs)); - LASSERT (list_empty (&kibnal_data.kib_connd_peers)); - - /* flag threads to terminate; wake and wait for them to die */ - kibnal_data.kib_shutdown = 1; - wake_up_all (&kibnal_data.kib_sched_waitq); - wake_up_all (&kibnal_data.kib_connd_waitq); - - i = 2; - while (atomic_read (&kibnal_data.kib_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read (&kibnal_data.kib_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case IBNAL_INIT_NOTHING: - break; - } - - kibnal_free_tx_descs(); - - if (kibnal_data.kib_peers != NULL) - LIBCFS_FREE (kibnal_data.kib_peers, - sizeof (struct list_head) * - kibnal_data.kib_peer_hash_size); - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - kibnal_data.kib_init = IBNAL_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -int -kibnal_startup (lnet_ni_t *ni) -{ - char scratch[32]; - char ipif_name[32]; - char *hca_name; - __u32 ip; - __u32 netmask; - int up; - int nob; - int devno; - struct timeval tv; - int rc; - int i; - vv_request_event_record_t req_er; - vv_return_t vvrc; - - LASSERT (ni->ni_lnd == &the_kiblnd); - - /* Only 1 instance supported */ - if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) { - CERROR ("Can't set credits(%d) > ntx(%d)\n", - *kibnal_tunables.kib_credits, - *kibnal_tunables.kib_ntx); - return -EINVAL; - } - - ni->ni_maxtxcredits = *kibnal_tunables.kib_credits; - ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits; - - CLASSERT (LNET_MAX_INTERFACES > 1); - - if (ni->ni_interfaces[0] != NULL) { - /* Use the HCA specified in 'networks=' */ - - if (ni->ni_interfaces[1] != NULL) { - CERROR("Multiple interfaces not supported\n"); - return -EPERM; - } - - /* Parse <hca base name><number> */ - hca_name = ni->ni_interfaces[0]; - nob = strlen(*kibnal_tunables.kib_hca_basename); - - if (strncmp(hca_name, *kibnal_tunables.kib_hca_basename, nob) || - sscanf(hca_name + nob, "%d%n", &devno, &nob) < 1) { - CERROR("Unrecognised HCA %s\n", hca_name); - return -EINVAL; - } - - } else { - /* Use <hca base name>0 */ - devno = 0; - - hca_name = scratch; - snprintf(hca_name, sizeof(scratch), "%s%d", - *kibnal_tunables.kib_hca_basename, devno); - if (strlen(hca_name) == sizeof(scratch) - 1) { - CERROR("HCA name %s truncated\n", hca_name); - return -EINVAL; - } - } - - /* Find IP address from <ipif base name><hca number> */ - snprintf(ipif_name, sizeof(ipif_name), "%s%d", - *kibnal_tunables.kib_ipif_basename, devno); - if (strlen(ipif_name) == sizeof(ipif_name) - 1) { - CERROR("IPoIB interface name %s truncated\n", ipif_name); - return -EINVAL; - } - - rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask); - if (rc != 0) { - CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc); - return -ENETDOWN; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name); - return -ENETDOWN; - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip); - - PORTAL_MODULE_USE; - memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */ - - kibnal_data.kib_ni = ni; - ni->ni_data = &kibnal_data; - - do_gettimeofday(&tv); - kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - rwlock_init(&kibnal_data.kib_global_lock); - - kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC (kibnal_data.kib_peers, - sizeof (struct list_head) * kibnal_data.kib_peer_hash_size); - if (kibnal_data.kib_peers == NULL) { - goto failed; - } - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kibnal_data.kib_peers[i]); - - spin_lock_init (&kibnal_data.kib_connd_lock); - INIT_LIST_HEAD (&kibnal_data.kib_connd_peers); - INIT_LIST_HEAD (&kibnal_data.kib_connd_pcreqs); - INIT_LIST_HEAD (&kibnal_data.kib_connd_conns); - INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies); - init_waitqueue_head (&kibnal_data.kib_connd_waitq); - - spin_lock_init (&kibnal_data.kib_sched_lock); - init_waitqueue_head (&kibnal_data.kib_sched_waitq); - - spin_lock_init (&kibnal_data.kib_tx_lock); - INIT_LIST_HEAD (&kibnal_data.kib_idle_txs); - - rc = kibnal_alloc_tx_descs(); - if (rc != 0) { - CERROR("Can't allocate tx descs\n"); - goto failed; - } - - /* lists/ptrs/locks initialised */ - kibnal_data.kib_init = IBNAL_INIT_DATA; - /*****************************************************/ - - for (i = 0; i < IBNAL_N_SCHED; i++) { - rc = kibnal_thread_start (kibnal_scheduler, (void *)((long)i)); - if (rc != 0) { - CERROR("Can't spawn vibnal scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - rc = kibnal_thread_start (kibnal_connd, NULL); - if (rc != 0) { - CERROR ("Can't spawn vibnal connd: %d\n", rc); - goto failed; - } - - vvrc = vv_hca_open(hca_name, NULL, &kibnal_data.kib_hca); - if (vvrc != vv_return_ok) { - CERROR ("Can't open HCA %s: %d\n", hca_name, vvrc); - goto failed; - } - - /* Channel Adapter opened */ - kibnal_data.kib_init = IBNAL_INIT_HCA; - - /* register to get HCA's asynchronous events. */ - req_er.req_event_type = VV_ASYNC_EVENT_ALL_MASK; - vvrc = vv_set_async_event_cb (kibnal_data.kib_hca, req_er, - kibnal_async_callback); - if (vvrc != vv_return_ok) { - CERROR ("Can't set HCA %s callback: %d\n", hca_name, vvrc); - goto failed; - } - - kibnal_data.kib_init = IBNAL_INIT_ASYNC; - - /*****************************************************/ - - vvrc = vv_hca_query(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs); - if (vvrc != vv_return_ok) { - CERROR ("Can't size port attrs for %s: %d\n", hca_name, vvrc); - goto failed; - } - - kibnal_data.kib_port = -1; - - for (i = 0; i<kibnal_data.kib_hca_attrs.port_num; i++) { - - int port_num = i+1; - u_int32_t tbl_count; - vv_port_attrib_t *pattr = &kibnal_data.kib_port_attr; - - vvrc = vv_port_query(kibnal_data.kib_hca, port_num, pattr); - if (vvrc != vv_return_ok) { - CERROR("vv_port_query failed for %s port %d: %d\n", - hca_name, port_num, vvrc); - continue; - } - - switch (pattr->port_state) { - case vv_state_linkDoun: - CDEBUG(D_NET, "port[%d] Down\n", port_num); - continue; - case vv_state_linkInit: - CDEBUG(D_NET, "port[%d] Init\n", port_num); - continue; - case vv_state_linkArm: - CDEBUG(D_NET, "port[%d] Armed\n", port_num); - continue; - case vv_state_linkActive: - CDEBUG(D_NET, "port[%d] Active\n", port_num); - - /* Found a suitable port. Get its GUID and PKEY. */ - tbl_count = 1; - vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca, - port_num, &tbl_count, - &kibnal_data.kib_port_gid); - if (vvrc != vv_return_ok) { - CERROR("vv_get_port_gid_tbl failed " - "for %s port %d: %d\n", - hca_name, port_num, vvrc); - continue; - } - - tbl_count = 1; - vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca, - port_num, &tbl_count, - &kibnal_data.kib_port_pkey); - if (vvrc != vv_return_ok) { - CERROR("vv_get_port_partition_tbl failed " - "for %s port %d: %d\n", - hca_name, port_num, vvrc); - continue; - } - - kibnal_data.kib_port = port_num; - - break; - case vv_state_linkActDefer: /* TODO: correct? */ - case vv_state_linkNoChange: - CERROR("Unexpected %s port[%d] state %d\n", - hca_name, i, pattr->port_state); - continue; - } - break; - } - - if (kibnal_data.kib_port == -1) { - CERROR ("Can't find an active port on %s\n", hca_name); - goto failed; - } - - CDEBUG(D_NET, "Using %s port %d - GID="LPX64":"LPX64"\n", - hca_name, kibnal_data.kib_port, - kibnal_data.kib_port_gid.scope.g.subnet, - kibnal_data.kib_port_gid.scope.g.eui64); - - /*****************************************************/ - -#if 1 - /* We use a pre-allocated PD */ - vvrc = vv_get_gen_pd_h(kibnal_data.kib_hca, &kibnal_data.kib_pd); -#else - vvrc = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd); -#endif - if (vvrc != vv_return_ok) { - CERROR ("Can't init PD: %d\n", vvrc); - goto failed; - } - - /* flag PD initialised */ - kibnal_data.kib_init = IBNAL_INIT_PD; - /*****************************************************/ - - rc = kibnal_setup_tx_descs(); - if (rc != 0) { - CERROR ("Can't register tx descs: %d\n", rc); - goto failed; - } - - /* flag TX descs initialised */ - kibnal_data.kib_init = IBNAL_INIT_TXD; - /*****************************************************/ - - { - __u32 nentries; - - vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(), - kibnal_cq_callback, - NULL, /* context */ - &kibnal_data.kib_cq, &nentries); - if (vvrc != 0) { - CERROR ("Can't create RX CQ: %d\n", vvrc); - goto failed; - } - - /* flag CQ initialised */ - kibnal_data.kib_init = IBNAL_INIT_CQ; - - if (nentries < IBNAL_CQ_ENTRIES()) { - CERROR ("CQ only has %d entries, need %d\n", - nentries, IBNAL_CQ_ENTRIES()); - goto failed; - } - - vvrc = vv_request_completion_notification(kibnal_data.kib_hca, - kibnal_data.kib_cq, - vv_next_solicit_unsolicit_event); - if (vvrc != 0) { - CERROR ("Failed to re-arm completion queue: %d\n", rc); - goto failed; - } - } - - rc = kibnal_start_listener(ni); - if (rc != 0) { - CERROR("Can't start listener: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kibnal_data.kib_init = IBNAL_INIT_ALL; - /*****************************************************/ - - return (0); - - failed: - CDEBUG(D_NET, "kibnal_startup failed\n"); - kibnal_shutdown (ni); - return (-ENETDOWN); -} - -void __exit -kibnal_module_fini (void) -{ - lnet_unregister_lnd(&the_kiblnd); - kibnal_tunables_fini(); -} - -int __init -kibnal_module_init (void) -{ - int rc; - - vibnal_assert_wire_constants(); - - CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) - <= cm_REQ_priv_data_len); - CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) - <= cm_REP_priv_data_len); - CLASSERT (sizeof(kib_msg_t) <= IBNAL_MSG_SIZE); -#if !IBNAL_USE_FMR - CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS]) - <= IBNAL_MSG_SIZE); - CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS]) - <= IBNAL_MSG_SIZE); -#endif - rc = kibnal_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kiblnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel Voltaire IB LND v1.00"); -MODULE_LICENSE("GPL"); - -module_init(kibnal_module_init); -module_exit(kibnal_module_fini); - diff --git a/lnet/klnds/viblnd/viblnd.h b/lnet/klnds/viblnd/viblnd.h deleted file mode 100644 index 2a254de718e2903a7f5e8a81dbb2cedbd40c7998..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd.h +++ /dev/null @@ -1,675 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Author: Frank Zago <fzago@systemfabricworks.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <linux/random.h> - -#include <net/sock.h> -#include <linux/in.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -/* CPU_{L,B}E #defines needed by Voltaire headers */ -#include <asm/byteorder.h> -#ifdef __BIG_ENDIAN__ -#define CPU_BE 1 -#define CPU_LE 0 -#endif -#ifdef __LITTLE_ENDIAN__ -#define CPU_BE 0 -#define CPU_LE 1 -#endif - -#include <vverbs.h> -#include <ib-cm.h> -#include <ibat.h> - -/* GCC 3.2.2, miscompiles this driver. - * See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9853. */ -#define GCC_VERSION ((__GNUC__*100 + __GNUC_MINOR__)*100 + __GNUC_PATCHLEVEL__) -#if (GCC_VERSION >= 30000) && (GCC_VERSION < 30203) -# error Invalid GCC version. Must use GCC < 3.0.0 || GCC >= 3.2.3 -#endif - -#ifdef CONFIG_SMP -# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define IBNAL_N_SCHED 1 /* # schedulers */ -#endif - -#define IBNAL_USE_FMR 1 - -/* tunables fixed at compile time */ -#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */ -#define IBNAL_CREDIT_HIGHWATER 7 /* when eagerly to return credits */ -#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ - -/* constants derived from sdp-connection.c */ -#define IBNAL_QKEY 0 -#define IBNAL_PKEY 0xffff -#define IBNAL_PKEY_IDX 0 -#define IBNAL_SGID_IDX 0 -#define IBNAL_SERVICE_LEVEL 0 -#define IBNAL_STATIC_RATE 0 -#define IBNAL_EE_FLOW_CNT 1 -#define IBNAL_LOCAL_SUB 1 -#define IBNAL_TRAFFIC_CLASS 0 -#define IBNAL_SOURCE_PATH_BIT 0 -#define IBNAL_OUS_DST_RD 1 -#define IBNAL_IB_MTU vv_mtu_1024 - -/* constants derived from sdp-hca-params.h */ -#define PATH_RATE_2_5GB 2 -#define MLX_IPD_1x 1 -#define MLX_IPD_4x 0 -#define IBNAL_R_2_STATIC_RATE(r) ((r) == PATH_RATE_2_5GB ? MLX_IPD_1x : MLX_IPD_4x) - -/* other low-level IB constants */ -#define IBNAL_PKT_LIFETIME 5 -#define IBNAL_ARB_INITIATOR_DEPTH 0 -#define IBNAL_ARB_RESP_RES 0 -#define IBNAL_FAILOVER_ACCEPTED 0 - -/************************/ -/* derived constants... */ - -/* TX messages (shared by all connections) */ -#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx) -#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE) -#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE) - -#if IBNAL_USE_FMR -# define IBNAL_MAX_RDMA_FRAGS 1 -# define IBNAL_CONCURRENT_SENDS IBNAL_RX_MSGS -#else -# define IBNAL_MAX_RDMA_FRAGS LNET_MAX_IOV -# define IBNAL_CONCURRENT_SENDS IBNAL_MSG_QUEUE_SIZE -#endif - -/* RX messages (per connection) */ -#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE*2) -#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE) -#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) - -#define IBNAL_CQ_ENTRIES() (IBNAL_TX_MSGS() * (1 + IBNAL_MAX_RDMA_FRAGS) + \ - IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers) - -typedef struct -{ - unsigned int *kib_service_number; /* IB service number */ - int *kib_min_reconnect_interval; /* first failed connection retry... */ - int *kib_max_reconnect_interval; /* ...exponentially increasing to this */ - int *kib_concurrent_peers; /* max # nodes all talking to me */ - int *kib_cksum; /* checksum kib_msg_t? */ - int *kib_timeout; /* comms timeout (seconds) */ - int *kib_ntx; /* # tx descs */ - int *kib_credits; /* # concurrent sends */ - int *kib_peercredits; /* # concurrent sends to 1 peer */ - int *kib_arp_retries; /* # times to retry ARP */ - char **kib_hca_basename; /* HCA base name */ - char **kib_ipif_basename; /* IPoIB interface base name */ - int *kib_local_ack_timeout; /* IB RC QP ack timeout... */ - int *kib_retry_cnt; /* ...and retry */ - int *kib_rnr_cnt; /* RNR retries... */ - int *kib_rnr_nak_timer; /* ...and interval */ - int *kib_keepalive; /* keepalive interval */ - int *kib_concurrent_sends; /* send work queue sizing */ -#if IBNAL_USE_FMR - int *kib_fmr_remaps; /* # FMR maps before unmap required */ -#endif -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */ -#endif -} kib_tunables_t; - -typedef struct -{ - int ibp_npages; /* # pages */ - struct page *ibp_pages[0]; -} kib_pages_t; - -#if IBNAL_USE_FMR -typedef struct -{ - vv_fmr_h_t md_fmrhandle; /* FMR handle */ - int md_fmrcount; /* # mappings left */ - int md_active; /* mapping in use? */ - __u32 md_lkey; /* local key */ - __u32 md_rkey; /* remote key */ - __u64 md_addr; /* IO VM address */ -} kib_md_t; -#endif - -typedef struct -{ - int kib_init; /* initialisation state */ - __u64 kib_incarnation; /* which one am I */ - int kib_shutdown; /* shut down? */ - atomic_t kib_nthreads; /* # live threads */ - lnet_ni_t *kib_ni; /* _the_ nal instance */ - - vv_gid_t kib_port_gid; /* device/port GID */ - vv_p_key_t kib_port_pkey; /* device/port pkey */ - - cm_cep_handle_t kib_listen_handle; /* IB listen handle */ - - rwlock_t kib_global_lock; /* stabilize peer/conn ops */ - int kib_ready; /* CQ callback fired */ - int kib_checking_cq; /* a scheduler is checking the CQ */ - - struct list_head *kib_peers; /* hash table of all my known peers */ - int kib_peer_hash_size; /* size of kib_peers */ - atomic_t kib_npeers; /* # peers extant */ - atomic_t kib_nconns; /* # connections extant */ - - void *kib_connd; /* the connd task (serialisation assertions) */ - struct list_head kib_connd_peers; /* peers wanting to get connected */ - struct list_head kib_connd_pcreqs; /* passive connection requests */ - struct list_head kib_connd_conns; /* connections to setup/teardown */ - struct list_head kib_connd_zombies; /* connections with zero refcount */ - wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */ - spinlock_t kib_connd_lock; /* serialise */ - - wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ - spinlock_t kib_sched_lock; /* serialise */ - - struct kib_tx *kib_tx_descs; /* all the tx descriptors */ - kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ - - struct list_head kib_idle_txs; /* idle tx descriptors */ - __u64 kib_next_tx_cookie; /* RDMA completion cookie */ - spinlock_t kib_tx_lock; /* serialise */ - - vv_hca_h_t kib_hca; /* The HCA */ - vv_hca_attrib_t kib_hca_attrs; /* its properties */ - int kib_port; /* port on the device */ - vv_port_attrib_t kib_port_attr; /* its properties */ - - vv_pd_h_t kib_pd; /* protection domain */ - vv_cq_h_t kib_cq; /* completion queue */ - -} kib_data_t; - -#define IBNAL_INIT_NOTHING 0 -#define IBNAL_INIT_DATA 1 -#define IBNAL_INIT_LIB 2 -#define IBNAL_INIT_HCA 3 -#define IBNAL_INIT_ASYNC 4 -#define IBNAL_INIT_PD 5 -#define IBNAL_INIT_TXD 6 -#define IBNAL_INIT_CQ 7 -#define IBNAL_INIT_ALL 8 - -#include "viblnd_wire.h" - -/***********************************************************************/ - -typedef struct kib_rx /* receive message */ -{ - struct list_head rx_list; /* queue for attention */ - struct kib_conn *rx_conn; /* owning conn */ - int rx_nob; /* # bytes received (-1 while posted) */ - vv_l_key_t rx_lkey; /* local key */ - kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */ - vv_wr_t rx_wrq; /* receive work item */ - vv_scatgat_t rx_gl; /* and its memory */ -} kib_rx_t; - -typedef struct kib_tx /* transmit message */ -{ - struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ - struct kib_conn *tx_conn; /* owning conn */ - int tx_sending; /* # tx callbacks outstanding */ - int tx_queued; /* queued for sending */ - int tx_waiting; /* waiting for peer */ - int tx_status; /* completion status */ - unsigned long tx_deadline; /* completion deadline */ - __u64 tx_cookie; /* completion cookie */ - lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */ - vv_l_key_t tx_lkey; /* local key for message buffer */ - kib_msg_t *tx_msg; /* message buffer (host vaddr) */ - int tx_nwrq; /* # send work items */ -#if IBNAL_USE_FMR - vv_wr_t tx_wrq[2]; /* send work items... */ - vv_scatgat_t tx_gl[2]; /* ...and their memory */ - kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */ - kib_md_t tx_md; /* FMR mapping descriptor */ - __u64 *tx_pages; /* page phys addrs */ -#else - vv_wr_t *tx_wrq; /* send work items... */ - vv_scatgat_t *tx_gl; /* ...and their memory */ - kib_rdma_desc_t *tx_rd; /* rdma descriptor (src buffers) */ -#endif -} kib_tx_t; - -/* Passive connection request (listener callback) queued for handling by connd */ -typedef struct kib_pcreq -{ - struct list_head pcr_list; /* queue for handling by connd */ - cm_cep_handle_t pcr_cep; /* listening handle */ - cm_request_data_t pcr_cmreq; /* request data */ -} kib_pcreq_t; - -typedef struct kib_connvars -{ - /* connection-in-progress variables */ - __u32 cv_port; - __u32 cv_pkey_index; - __u32 cv_rnr_count; - __u32 cv_sgid_index; - __u32 cv_remote_qpn; - __u32 cv_local_qpn; - __u32 cv_rxpsn; - __u32 cv_txpsn; - ib_path_record_v2_t cv_path; - ibat_arp_data_t cv_arp; - ibat_stat_t cv_arprc; - cm_conn_data_t cv_conndata; -} kib_connvars_t; - -typedef struct kib_conn -{ - struct kib_peer *ibc_peer; /* owning peer */ - struct list_head ibc_list; /* stash on peer's conn list */ - __u64 ibc_incarnation; /* which instance of the peer */ - __u64 ibc_txseq; /* tx sequence number */ - __u64 ibc_rxseq; /* rx sequence number */ - __u32 ibc_version; /* peer protocol version */ - atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits; /* # credits for ACK/DONE msgs */ - int ibc_disconnect; /* some disconnect callback fired */ - int ibc_comms_error; /* set on comms error */ - unsigned long ibc_last_send; /* time of last send */ - struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */ - struct list_head ibc_tx_queue_nocred; /* sends that don't need a cred */ - struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */ - struct list_head ibc_tx_queue; /* send queue */ - struct list_head ibc_active_txs; /* active tx awaiting completion */ - spinlock_t ibc_lock; /* serialise */ - kib_rx_t *ibc_rxs; /* the rx descs */ - kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ - vv_qp_h_t ibc_qp; /* queue pair */ - cm_cep_handle_t ibc_cep; /* connection endpoint */ - kib_connvars_t *ibc_connvars; /* in-progress connection state */ -} kib_conn_t; - -#define IBNAL_CONN_INIT_NOTHING 0 /* incomplete init */ -#define IBNAL_CONN_INIT_QP 1 /* QP allocated */ -#define IBNAL_CONN_INIT 2 /* completed init */ -#define IBNAL_CONN_ACTIVE_ARP 3 /* active arping */ -#define IBNAL_CONN_ACTIVE_CONNECT 4 /* active sending req */ -#define IBNAL_CONN_ACTIVE_CHECK_REPLY 5 /* active checking reply */ -#define IBNAL_CONN_ACTIVE_RTU 6 /* active sending rtu */ -#define IBNAL_CONN_PASSIVE_WAIT 7 /* passive waiting for rtu */ -#define IBNAL_CONN_ESTABLISHED 8 /* connection established */ -#define IBNAL_CONN_DISCONNECT1 9 /* disconnect phase 1 */ -#define IBNAL_CONN_DISCONNECT2 10 /* disconnect phase 2 */ -#define IBNAL_CONN_DISCONNECTED 11 /* disconnect complete */ - -typedef struct kib_peer -{ - struct list_head ibp_list; /* stash on global peer list */ - struct list_head ibp_connd_list; /* schedule on kib_connd_peers */ - lnet_nid_t ibp_nid; /* who's on the other end(s) */ - __u32 ibp_ip; /* IP to query for peer conn params */ - int ibp_port; /* port to qery for peer conn params */ - __u64 ibp_incarnation; /* peer's incarnation */ - atomic_t ibp_refcount; /* # users */ - int ibp_persistence; /* "known" peer refs */ - struct list_head ibp_conns; /* all active connections */ - struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - int ibp_connecting; /* current active connection attempts */ - int ibp_accepting; /* current passive connection attempts */ - int ibp_arp_count; /* # arp attempts */ - unsigned long ibp_reconnect_time; /* when reconnect may be attempted */ - unsigned long ibp_reconnect_interval; /* exponential backoff */ - int ibp_error; /* errno on closing this peer */ - cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */ -} kib_peer_t; - - -extern kib_data_t kibnal_data; -extern kib_tunables_t kibnal_tunables; - -int kibnal_startup (lnet_ni_t *ni); -void kibnal_shutdown (lnet_ni_t *ni); -int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -extern int kibnal_eager_recv (lnet_ni_t *ni, void *private, - lnet_msg_t *lntmsg, void **new_private); -int kibnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -extern void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob); -extern void kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, - lnet_nid_t dstnid, __u64 dststamp, __u64 seq); -extern int kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob); -extern int kibnal_create_peer(kib_peer_t **peerp, lnet_nid_t nid); -extern void kibnal_destroy_peer(kib_peer_t *peer); -extern int kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip); -extern int kibnal_del_peer(lnet_nid_t nid); -extern kib_peer_t *kibnal_find_peer_locked(lnet_nid_t nid); -extern void kibnal_unlink_peer_locked(kib_peer_t *peer); -extern void kibnal_peer_alive(kib_peer_t *peer); -extern int kibnal_close_stale_conns_locked(kib_peer_t *peer, - __u64 incarnation); -extern kib_conn_t *kibnal_create_conn(cm_cep_handle_t cep); -extern void kibnal_listen_callback(cm_cep_handle_t cep, cm_conn_data_t *info, void *arg); - -extern int kibnal_alloc_pages(kib_pages_t **pp, int npages, int access); -extern void kibnal_free_pages(kib_pages_t *p); - -extern void kibnal_check_sends(kib_conn_t *conn); -extern void kibnal_close_conn_locked(kib_conn_t *conn, int error); -extern void kibnal_destroy_conn(kib_conn_t *conn); -extern int kibnal_thread_start(int (*fn)(void *arg), void *arg); -extern int kibnal_scheduler(void *arg); -extern int kibnal_connd(void *arg); -extern void kibnal_init_tx_msg(kib_tx_t *tx, int type, int body_nob); -extern void kibnal_close_conn(kib_conn_t *conn, int why); -extern int kibnal_set_qp_state(kib_conn_t *conn, vv_qp_state_t new_state); -extern void kibnal_async_callback(vv_event_record_t ev); -extern void kibnal_cq_callback(unsigned long context); -extern void kibnal_passive_connreq(kib_pcreq_t *pcr, int reject); -extern void kibnal_txlist_done (struct list_head *txlist, int status); -extern void kibnal_queue_tx(kib_tx_t *tx, kib_conn_t *conn); -extern int kibnal_init_rdma(kib_tx_t *tx, int type, int nob, - kib_rdma_desc_t *dstrd, __u64 dstcookie); -extern int kibnal_tunables_init(void); -extern void kibnal_tunables_fini(void); - -#define kibnal_conn_addref(conn) \ -do { \ - CDEBUG(D_NET, "conn[%p] (%d)++\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - atomic_inc(&(conn)->ibc_refcount); \ -} while (0) - -#define kibnal_conn_decref(conn) \ -do { \ - unsigned long flags; \ - \ - CDEBUG(D_NET, "conn[%p] (%d)--\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); \ - list_add_tail(&(conn)->ibc_list, \ - &kibnal_data.kib_connd_zombies); \ - wake_up(&kibnal_data.kib_connd_waitq); \ - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); \ - } \ -} while (0) - -#define kibnal_peer_addref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - atomic_inc(&(peer)->ibp_refcount); \ -} while (0) - -#define kibnal_peer_decref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ - kibnal_destroy_peer(peer); \ -} while (0) - -static inline struct list_head * -kibnal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - - return (&kibnal_data.kib_peers [hash]); -} - -static inline int -kibnal_peer_active (kib_peer_t *peer) -{ - /* Am I in the peer hash table? */ - return (!list_empty(&peer->ibp_list)); -} - -static inline void -kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) -{ - struct list_head *q; - - LASSERT (tx->tx_nwrq > 0); /* work items set up */ - LASSERT (!tx->tx_queued); /* not queued for sending already */ - - tx->tx_queued = 1; - tx->tx_deadline = jiffies + (*kibnal_tunables.kib_timeout * HZ); - - if (tx->tx_conn == NULL) { - kibnal_conn_addref(conn); - tx->tx_conn = conn; - LASSERT (tx->tx_msg->ibm_type != IBNAL_MSG_PUT_DONE); - } else { - LASSERT (tx->tx_conn == conn); - LASSERT (tx->tx_msg->ibm_type == IBNAL_MSG_PUT_DONE); - } - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* All messages have simple credit control */ - q = &conn->ibc_tx_queue; - } else { - LASSERT (conn->ibc_version == IBNAL_MSG_VERSION); - - switch (tx->tx_msg->ibm_type) { - case IBNAL_MSG_PUT_REQ: - case IBNAL_MSG_GET_REQ: - /* RDMA request: reserve a buffer for the RDMA reply - * before sending */ - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBNAL_MSG_PUT_NAK: - case IBNAL_MSG_PUT_ACK: - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - /* RDMA reply/completion: no credits; peer has reserved - * a reply buffer */ - q = &conn->ibc_tx_queue_nocred; - break; - - case IBNAL_MSG_NOOP: - case IBNAL_MSG_IMMEDIATE: - /* Otherwise: consume a credit before sending */ - q = &conn->ibc_tx_queue; - break; - - default: - LBUG(); - q = NULL; - } - } - - list_add_tail(&tx->tx_list, q); -} - -static inline int -kibnal_send_keepalive(kib_conn_t *conn) -{ - return (*kibnal_tunables.kib_keepalive > 0) && - time_after(jiffies, conn->ibc_last_send + - *kibnal_tunables.kib_keepalive*HZ); -} - -#ifndef IBNAL_VOIDSTAR_SGADDR -# define IBNAL_VOIDSTAR_SGADDR 0 -#endif - -#if IBNAL_VOIDSTAR_SGADDR -# if defined(CONFIG_HIGHMEM) -# if defined(CONFIG_X86) && defined(CONFIG_HIGHMEM4G) - /* truncation to void* doesn't matter if 0 <= physmem < 4G - * so allow x86 with 32 bit phys addrs */ -# elif defined(CONFIG_IA64) - /* OK anyway on 64-bit arch */ -# else -# error "Can't support HIGHMEM when vv_scatgat_t::v_address is void *" -# endif -# endif -# define KIBNAL_ADDR2SG(a) ((void *)((unsigned long)(a))) -# define KIBNAL_SG2ADDR(a) ((__u64)((unsigned long)(a))) -static inline __u64 kibnal_addr2net (__u64 addr) -{ - void *netaddr; - vv_return_t vvrc = vv_va2advertise_addr(kibnal_data.kib_hca, - KIBNAL_ADDR2SG(addr), - &netaddr); - LASSERT (vvrc == vv_return_ok); - return KIBNAL_SG2ADDR(netaddr); -} -#else -# define KIBNAL_ADDR2SG(a) a -# define KIBNAL_SG2ADDR(a) a -static inline __u64 kibnal_addr2net (__u64 addr) -{ - __u64 netaddr; - vv_return_t vvrc = vv_va2advertise_addr(kibnal_data.kib_hca, - addr, - &netaddr); - LASSERT (vvrc == vv_return_ok); - return netaddr; -} -#endif - -/* CAVEAT EMPTOR: We rely on tx/rx descriptor alignment to allow us to use the - * lowest 2 bits of the work request id to stash the work item type (the op - * field is not valid when the wc completes in error). */ - -#define IBNAL_WID_TX 0 -#define IBNAL_WID_RX 1 -#define IBNAL_WID_RDMA 2 -#define IBNAL_WID_MASK 3UL - -static inline vv_wr_id_t -kibnal_ptr2wreqid (void *ptr, int type) -{ - unsigned long lptr = (unsigned long)ptr; - - LASSERT ((lptr & IBNAL_WID_MASK) == 0); - LASSERT ((type & ~IBNAL_WID_MASK) == 0); - return (vv_wr_id_t)(lptr | type); -} - -static inline void * -kibnal_wreqid2ptr (vv_wr_id_t wreqid) -{ - return (void *)(((unsigned long)wreqid) & ~IBNAL_WID_MASK); -} - -static inline int -kibnal_wreqid2type (vv_wr_id_t wreqid) -{ - return (wreqid & IBNAL_WID_MASK); -} - -static inline void -kibnal_set_conn_state (kib_conn_t *conn, int state) -{ - conn->ibc_state = state; - mb(); -} - -#if IBNAL_USE_FMR - -static inline int -kibnal_rd_size (kib_rdma_desc_t *rd) -{ - return rd->rd_nob; -} - -#else -static inline __u64 -kibnal_rf_addr (kib_rdma_frag_t *rf) -{ - return (((__u64)rf->rf_addr_hi)<<32) | ((__u64)rf->rf_addr_lo); -} - -static inline void -kibnal_rf_set (kib_rdma_frag_t *rf, __u64 addr, int nob) -{ - rf->rf_addr_lo = addr & 0xffffffff; - rf->rf_addr_hi = (addr >> 32) & 0xffffffff; - rf->rf_nob = nob; -} - -static inline int -kibnal_rd_size (kib_rdma_desc_t *rd) -{ - int i; - int size; - - for (i = size = 0; i < rd->rd_nfrag; i++) - size += rd->rd_frags[i].rf_nob; - - return size; -} -#endif diff --git a/lnet/klnds/viblnd/viblnd_cb.c b/lnet/klnds/viblnd/viblnd_cb.c deleted file mode 100644 index d468673b3a3ac9ffcb666b4921801e90ab4a2dc5..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd_cb.c +++ /dev/null @@ -1,3681 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Author: Frank Zago <fzago@systemfabricworks.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "viblnd.h" - -void -kibnal_tx_done (kib_tx_t *tx) -{ - lnet_msg_t *lntmsg[2]; - int rc = tx->tx_status; - int i; - - LASSERT (!in_interrupt()); - LASSERT (!tx->tx_queued); /* mustn't be queued for sending */ - LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */ - LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */ - -#if IBNAL_USE_FMR - if (tx->tx_md.md_fmrcount == 0 || - (rc != 0 && tx->tx_md.md_active)) { - vv_return_t vvrc; - - /* mapping must be active (it dropped fmrcount to 0) */ - LASSERT (tx->tx_md.md_active); - - vvrc = vv_unmap_fmr(kibnal_data.kib_hca, - 1, &tx->tx_md.md_fmrhandle); - LASSERT (vvrc == vv_return_ok); - - tx->tx_md.md_fmrcount = *kibnal_tunables.kib_fmr_remaps; - } - tx->tx_md.md_active = 0; -#endif - - /* tx may have up to 2 lnet msgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - - if (tx->tx_conn != NULL) { - kibnal_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nwrq = 0; - tx->tx_status = 0; - - spin_lock(&kibnal_data.kib_tx_lock); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - spin_unlock(&kibnal_data.kib_tx_lock); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (lntmsg[i] == NULL) - continue; - - lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc); - } -} - -void -kibnal_txlist_done (struct list_head *txlist, int status) -{ - kib_tx_t *tx; - - while (!list_empty (txlist)) { - tx = list_entry (txlist->next, kib_tx_t, tx_list); - - list_del (&tx->tx_list); - /* complete now */ - tx->tx_waiting = 0; - tx->tx_status = status; - kibnal_tx_done (tx); - } -} - -kib_tx_t * -kibnal_get_idle_tx (void) -{ - kib_tx_t *tx; - - spin_lock(&kibnal_data.kib_tx_lock); - - if (list_empty (&kibnal_data.kib_idle_txs)) { - spin_unlock(&kibnal_data.kib_tx_lock); - return NULL; - } - - tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - /* Allocate a new completion cookie. It might not be needed, - * but we've got a lock right now and we're unlikely to - * wrap... */ - tx->tx_cookie = kibnal_data.kib_next_tx_cookie++; - - spin_unlock(&kibnal_data.kib_tx_lock); - - LASSERT (tx->tx_nwrq == 0); - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending == 0); - LASSERT (!tx->tx_waiting); - LASSERT (tx->tx_status == 0); - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); - - return tx; -} - -int -kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit) -{ - kib_conn_t *conn = rx->rx_conn; - int rc = 0; - __u64 addr = (__u64)((unsigned long)((rx)->rx_msg)); - vv_return_t vvrc; - - LASSERT (!in_interrupt()); - /* old peers don't reserve rxs for RDMA replies */ - LASSERT (!rsrvd_credit || - conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - - rx->rx_gl = (vv_scatgat_t) { - .v_address = KIBNAL_ADDR2SG(addr), - .l_key = rx->rx_lkey, - .length = IBNAL_MSG_SIZE, - }; - - rx->rx_wrq = (vv_wr_t) { - .wr_id = kibnal_ptr2wreqid(rx, IBNAL_WID_RX), - .completion_notification = 1, - .scatgat_list = &rx->rx_gl, - .num_of_data_segments = 1, - .wr_type = vv_wr_receive, - }; - - LASSERT (conn->ibc_state >= IBNAL_CONN_INIT); - LASSERT (rx->rx_nob >= 0); /* not posted */ - - CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n", - rx->rx_wrq.scatgat_list->length, - rx->rx_wrq.scatgat_list->l_key, - KIBNAL_SG2ADDR(rx->rx_wrq.scatgat_list->v_address)); - - if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) { - /* No more posts for this rx; so lose its ref */ - kibnal_conn_decref(conn); - return 0; - } - - rx->rx_nob = -1; /* flag posted */ - - spin_lock(&conn->ibc_lock); - /* Serialise vv_post_receive; it's not re-entrant on the same QP */ - vvrc = vv_post_receive(kibnal_data.kib_hca, - conn->ibc_qp, &rx->rx_wrq); - - if (vvrc == vv_return_ok) { - if (credit) - conn->ibc_outstanding_credits++; - if (rsrvd_credit) - conn->ibc_reserved_credits++; - - spin_unlock(&conn->ibc_lock); - - if (credit || rsrvd_credit) - kibnal_check_sends(conn); - - return 0; - } - - spin_unlock(&conn->ibc_lock); - - CERROR ("post rx -> %s failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), vvrc); - rc = -EIO; - kibnal_close_conn(rx->rx_conn, rc); - /* No more posts for this rx; so lose its ref */ - kibnal_conn_decref(conn); - return rc; -} - -int -kibnal_post_receives (kib_conn_t *conn) -{ - int i; - int rc; - - LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED); - LASSERT (conn->ibc_comms_error == 0); - - for (i = 0; i < IBNAL_RX_MSGS; i++) { - /* +1 ref for rx desc. This ref remains until kibnal_post_rx - * fails (i.e. actual failure or we're disconnecting) */ - kibnal_conn_addref(conn); - rc = kibnal_post_rx (&conn->ibc_rxs[i], 0, 0); - if (rc != 0) - return rc; - } - - return 0; -} - -kib_tx_t * -kibnal_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie) -{ - struct list_head *tmp; - - list_for_each(tmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending != 0 || tx->tx_waiting); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_waiting && - tx->tx_msg->ibm_type == txtype) - return tx; - - CWARN("Bad completion: %swaiting, type %x (wanted %x)\n", - tx->tx_waiting ? "" : "NOT ", - tx->tx_msg->ibm_type, txtype); - } - return NULL; -} - -void -kibnal_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) -{ - kib_tx_t *tx; - int idle; - - spin_lock(&conn->ibc_lock); - - tx = kibnal_find_waiting_tx_locked(conn, txtype, cookie); - if (tx == NULL) { - spin_unlock(&conn->ibc_lock); - - CWARN("Unmatched completion type %x cookie "LPX64" from %s\n", - txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_close_conn (conn, -EPROTO); - return; - } - - if (tx->tx_status == 0) { /* success so far */ - if (status < 0) { /* failed? */ - tx->tx_status = status; - } else if (txtype == IBNAL_MSG_GET_REQ) { - lnet_set_reply_msg_len(kibnal_data.kib_ni, - tx->tx_lntmsg[1], status); - } - } - - tx->tx_waiting = 0; - - idle = !tx->tx_queued && (tx->tx_sending == 0); - if (idle) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (idle) - kibnal_tx_done(tx); -} - -void -kibnal_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie) -{ - kib_tx_t *tx = kibnal_get_idle_tx(); - - if (tx == NULL) { - CERROR("Can't get tx for completion %x for %s\n", - type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - tx->tx_msg->ibm_u.completion.ibcm_status = status; - tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie; - kibnal_init_tx_msg(tx, type, sizeof(kib_completion_msg_t)); - - kibnal_queue_tx(tx, conn); -} - -void -kibnal_handle_rx (kib_rx_t *rx) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - int credits = msg->ibm_credits; - kib_tx_t *tx; - int rc = 0; - int repost = 1; - int rsrvd_credit = 0; - int rc2; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - CDEBUG (D_NET, "Received %x[%d] from %s\n", - msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - if (credits != 0) { - /* Have I received credits that will let me send? */ - spin_lock(&conn->ibc_lock); - conn->ibc_credits += credits; - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); - } - - switch (msg->ibm_type) { - default: - CERROR("Bad IBNAL message type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_IMMEDIATE: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_PUT_REQ: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.putreq.ibprm_hdr, - msg->ibm_srcnid, rx, 1); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_PUT_NAK: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_handle_completion(conn, IBNAL_MSG_PUT_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBNAL_MSG_PUT_ACK: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - spin_lock(&conn->ibc_lock); - tx = kibnal_find_waiting_tx_locked(conn, IBNAL_MSG_PUT_REQ, - msg->ibm_u.putack.ibpam_src_cookie); - if (tx != NULL) - list_del(&tx->tx_list); - spin_unlock(&conn->ibc_lock); - - if (tx == NULL) { - CERROR("Unmatched PUT_ACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - } - - LASSERT (tx->tx_waiting); - /* CAVEAT EMPTOR: I could be racing with tx_complete, but... - * (a) I can overwrite tx_msg since my peer has received it! - * (b) tx_waiting set tells tx_complete() it's not done. */ - - tx->tx_nwrq = 0; /* overwrite PUT_REQ */ - - rc2 = kibnal_init_rdma(tx, IBNAL_MSG_PUT_DONE, - kibnal_rd_size(&msg->ibm_u.putack.ibpam_rd), - &msg->ibm_u.putack.ibpam_rd, - msg->ibm_u.putack.ibpam_dst_cookie); - if (rc2 < 0) - CERROR("Can't setup rdma for PUT to %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2); - - spin_lock(&conn->ibc_lock); - if (tx->tx_status == 0 && rc2 < 0) - tx->tx_status = rc2; - tx->tx_waiting = 0; /* clear waiting and queue atomically */ - kibnal_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - break; - - case IBNAL_MSG_PUT_DONE: - /* This buffer was pre-reserved by not returning the credit - * when the PUT_REQ's buffer was reposted, so I just return it - * now */ - kibnal_handle_completion(conn, IBNAL_MSG_PUT_ACK, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBNAL_MSG_GET_REQ: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.get.ibgm_hdr, - msg->ibm_srcnid, rx, 1); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_GET_DONE: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - kibnal_handle_completion(conn, IBNAL_MSG_GET_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - } - - if (rc < 0) /* protocol error */ - kibnal_close_conn(conn, rc); - - if (repost) { - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) - rsrvd_credit = 0; /* peer isn't pre-reserving */ - - kibnal_post_rx(rx, !rsrvd_credit, rsrvd_credit); - } -} - -void -kibnal_rx_complete (kib_rx_t *rx, vv_comp_status_t vvrc, int nob, __u64 rxseq) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - unsigned long flags; - int rc; - - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - LASSERT (rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - - if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) - goto ignore; - - if (vvrc != vv_comp_status_success) { - CERROR("Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), vvrc); - goto failed; - } - - rc = kibnal_unpack_msg(msg, conn->ibc_version, nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - rx->rx_nob = nob; /* Can trust 'nob' now */ - - if (!lnet_ptlcompat_matchnid(conn->ibc_peer->ibp_nid, - msg->ibm_srcnid) || - !lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR ("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - if (msg->ibm_seq != rxseq) { - CERROR ("Out-of-sequence rx from %s" - ": got "LPD64" but expected "LPD64"\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - msg->ibm_seq, rxseq); - goto failed; - } - - /* set time last known alive */ - kibnal_peer_alive(conn->ibc_peer); - - /* racing with connection establishment/teardown! */ - - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - /* must check holding global lock to eliminate race */ - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - list_add_tail(&rx->rx_list, &conn->ibc_early_rxs); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return; - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - } - kibnal_handle_rx(rx); - return; - - failed: - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - kibnal_close_conn(conn, -EIO); - ignore: - /* Don't re-post rx & drop its ref on conn */ - kibnal_conn_decref(conn); -} - -struct page * -kibnal_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) { - page = vmalloc_to_page ((void *)vaddr); - LASSERT (page != NULL); - return page; - } -#ifdef CONFIG_HIGHMEM - if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) { - /* No highmem pages only used for bulk (kiov) I/O */ - CERROR("find page for address in highmem\n"); - LBUG(); - } -#endif - page = virt_to_page (vaddr); - LASSERT (page != NULL); - return page; -} - -#if !IBNAL_USE_FMR -int -kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page, - unsigned long page_offset, unsigned long len) -{ - kib_rdma_frag_t *frag = &rd->rd_frags[rd->rd_nfrag]; - vv_l_key_t l_key; - vv_r_key_t r_key; - __u64 addr; - __u64 frag_addr; - vv_mem_reg_h_t mem_h; - vv_return_t vvrc; - - if (rd->rd_nfrag >= IBNAL_MAX_RDMA_FRAGS) { - CERROR ("Too many RDMA fragments\n"); - return -EMSGSIZE; - } - - /* Try to create an address that adaptor-tavor will munge into a valid - * network address, given how it maps all phys mem into 1 region */ - addr = lnet_page2phys(page) + page_offset + PAGE_OFFSET; - - /* NB this relies entirely on there being a single region for the whole - * of memory, since "high" memory will wrap in the (void *) cast! */ - vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca, - (void *)((unsigned long)addr), - len, &mem_h, &l_key, &r_key); - LASSERT (vvrc == vv_return_ok); - - if (active) { - if (rd->rd_nfrag == 0) { - rd->rd_key = l_key; - } else if (l_key != rd->rd_key) { - CERROR ("> 1 key for single RDMA desc\n"); - return -EINVAL; - } - frag_addr = addr; - } else { - if (rd->rd_nfrag == 0) { - rd->rd_key = r_key; - } else if (r_key != rd->rd_key) { - CERROR ("> 1 key for single RDMA desc\n"); - return -EINVAL; - } - - frag_addr = kibnal_addr2net(addr); - } - - kibnal_rf_set(frag, frag_addr, len); - - CDEBUG(D_NET,"map frag [%d][%d %x %08x%08x] "LPX64"\n", - rd->rd_nfrag, frag->rf_nob, rd->rd_key, - frag->rf_addr_hi, frag->rf_addr_lo, frag_addr); - - rd->rd_nfrag++; - return 0; -} - -int -kibnal_setup_rd_iov(kib_tx_t *tx, kib_rdma_desc_t *rd, - vv_access_con_bit_mask_t access, - unsigned int niov, struct iovec *iov, int offset, int nob) -{ - /* active if I'm sending */ - int active = ((access & vv_acc_r_mem_write) == 0); - int fragnob; - int rc; - unsigned long vaddr; - struct page *page; - int page_offset; - - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - rd->rd_nfrag = 0; - do { - LASSERT (niov > 0); - - vaddr = ((unsigned long)iov->iov_base) + offset; - page_offset = vaddr & (PAGE_SIZE - 1); - page = kibnal_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR ("Can't find page\n"); - return -EFAULT; - } - - fragnob = min((int)(iov->iov_len - offset), nob); - fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); - - rc = kibnal_append_rdfrag(rd, active, page, - page_offset, fragnob); - if (rc != 0) - return rc; - - if (offset + fragnob < iov->iov_len) { - offset += fragnob; - } else { - offset = 0; - iov++; - niov--; - } - nob -= fragnob; - } while (nob > 0); - - return 0; -} - -int -kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, - vv_access_con_bit_mask_t access, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - /* active if I'm sending */ - int active = ((access & vv_acc_r_mem_write) == 0); - int fragnob; - int rc; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - rd->rd_nfrag = 0; - do { - LASSERT (nkiov > 0); - fragnob = min((int)(kiov->kiov_len - offset), nob); - - rc = kibnal_append_rdfrag(rd, active, kiov->kiov_page, - kiov->kiov_offset + offset, - fragnob); - if (rc != 0) - return rc; - - offset = 0; - kiov++; - nkiov--; - nob -= fragnob; - } while (nob > 0); - - return 0; -} -#else -int -kibnal_map_tx (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - int npages, unsigned long page_offset, int nob) -{ - vv_return_t vvrc; - vv_fmr_map_t map_props; - - LASSERT ((rd != tx->tx_rd) == !active); - LASSERT (!tx->tx_md.md_active); - LASSERT (tx->tx_md.md_fmrcount > 0); - LASSERT (page_offset < PAGE_SIZE); - LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT))); - LASSERT (npages <= LNET_MAX_IOV); - - memset(&map_props, 0, sizeof(map_props)); - - map_props.start = (void *)page_offset; - map_props.size = nob; - map_props.page_array_len = npages; - map_props.page_array = tx->tx_pages; - - vvrc = vv_map_fmr(kibnal_data.kib_hca, tx->tx_md.md_fmrhandle, - &map_props, &tx->tx_md.md_lkey, &tx->tx_md.md_rkey); - if (vvrc != vv_return_ok) { - CERROR ("Can't map vaddr %p for %d in %d pages: %d\n", - map_props.start, nob, npages, vvrc); - return -EFAULT; - } - - tx->tx_md.md_addr = (unsigned long)map_props.start; - tx->tx_md.md_active = 1; - tx->tx_md.md_fmrcount--; - - rd->rd_key = active ? tx->tx_md.md_lkey : tx->tx_md.md_rkey; - rd->rd_nob = nob; - rd->rd_addr = tx->tx_md.md_addr; - - /* Compensate for adaptor-tavor's munging of gatherlist addresses */ - if (active) - rd->rd_addr += PAGE_OFFSET; - - return 0; -} - -int -kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd, - vv_access_con_bit_mask_t access, - unsigned int niov, struct iovec *iov, int offset, int nob) -{ - /* active if I'm sending */ - int active = ((access & vv_acc_r_mem_write) == 0); - int resid; - int fragnob; - struct page *page; - int npages; - unsigned long page_offset; - unsigned long vaddr; - - LASSERT (nob > 0); - LASSERT (niov > 0); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR ("Can't map multiple vaddr fragments\n"); - return (-EMSGSIZE); - } - - vaddr = ((unsigned long)iov->iov_base) + offset; - - page_offset = vaddr & (PAGE_SIZE - 1); - resid = nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - - page = kibnal_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR("Can't find page for %lu\n", vaddr); - return -EFAULT; - } - - tx->tx_pages[npages++] = lnet_page2phys(page); - - fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1)); - vaddr += fragnob; - resid -= fragnob; - - } while (resid > 0); - - return kibnal_map_tx(tx, rd, active, npages, page_offset, nob); -} - -int -kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, - vv_access_con_bit_mask_t access, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - /* active if I'm sending */ - int active = ((access & vv_acc_r_mem_write) == 0); - int resid; - int npages; - unsigned long page_offset; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (nkiov <= LNET_MAX_IOV); - LASSERT (!tx->tx_md.md_active); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - page_offset = kiov->kiov_offset + offset; - - resid = offset + nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - LASSERT (nkiov > 0); - - if ((npages > 0 && kiov->kiov_offset != 0) || - (resid > kiov->kiov_len && - (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) { - /* Can't have gaps */ - CERROR ("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", - npages, kiov->kiov_offset, kiov->kiov_len); - - return -EINVAL; - } - - tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page); - resid -= kiov->kiov_len; - kiov++; - nkiov--; - } while (resid > 0); - - return kibnal_map_tx(tx, rd, active, npages, page_offset, nob); -} -#endif - -kib_conn_t * -kibnal_find_conn_locked (kib_peer_t *peer) -{ - struct list_head *tmp; - - /* just return the first connection */ - list_for_each (tmp, &peer->ibp_conns) { - return (list_entry(tmp, kib_conn_t, ibc_list)); - } - - return (NULL); -} - -void -kibnal_check_sends (kib_conn_t *conn) -{ - kib_tx_t *tx; - vv_return_t vvrc; - int rc; - int consume_cred; - int done; - - /* Don't send anything until after the connection is established */ - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - CDEBUG(D_NET, "%s too soon\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - spin_lock(&conn->ibc_lock); - - LASSERT (conn->ibc_nsends_posted <= - *kibnal_tunables.kib_concurrent_sends); - LASSERT (conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_nocred) && - (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER || - kibnal_send_keepalive(conn))) { - spin_unlock(&conn->ibc_lock); - - tx = kibnal_get_idle_tx(); - if (tx != NULL) - kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0); - - spin_lock(&conn->ibc_lock); - - if (tx != NULL) - kibnal_queue_tx_locked(tx, conn); - } - - for (;;) { - if (!list_empty(&conn->ibc_tx_queue_nocred)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry (conn->ibc_tx_queue_nocred.next, - kib_tx_t, tx_list); - consume_cred = 0; - } else if (!list_empty (&conn->ibc_tx_queue)) { - tx = list_entry (conn->ibc_tx_queue.next, - kib_tx_t, tx_list); - consume_cred = 1; - } else { - /* nothing waiting */ - break; - } - - LASSERT (tx->tx_queued); - /* We rely on this for QP sizing */ - LASSERT (tx->tx_nwrq > 0 && tx->tx_nwrq <= 1 + IBNAL_MAX_RDMA_FRAGS); - - LASSERT (conn->ibc_outstanding_credits >= 0); - LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE); - LASSERT (conn->ibc_credits >= 0); - LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE); - - if (conn->ibc_nsends_posted == - *kibnal_tunables.kib_concurrent_sends) { - /* We've got some tx completions outstanding... */ - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (consume_cred) { - if (conn->ibc_credits == 0) { /* no credits */ - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (conn->ibc_credits == 1 && /* last credit reserved for */ - conn->ibc_outstanding_credits == 0) { /* giving back credits */ - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - } - - list_del (&tx->tx_list); - tx->tx_queued = 0; - - /* NB don't drop ibc_lock before bumping tx_sending */ - - if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP && - (!list_empty(&conn->ibc_tx_queue) || - !list_empty(&conn->ibc_tx_queue_nocred) || - (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER && - !kibnal_send_keepalive(conn)))) { - /* redundant NOOP */ - spin_unlock(&conn->ibc_lock); - kibnal_tx_done(tx); - spin_lock(&conn->ibc_lock); - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - continue; - } - - kibnal_pack_msg(tx->tx_msg, conn->ibc_version, - conn->ibc_outstanding_credits, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation, - conn->ibc_txseq); - - conn->ibc_txseq++; - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (consume_cred) - conn->ibc_credits--; - - /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA - * PUT. If so, it was first queued here as a PUT_REQ, sent and - * stashed on ibc_active_txs, matched by an incoming PUT_ACK, - * and then re-queued here. It's (just) possible that - * tx_sending is non-zero if we've not done the tx_complete() from - * the first send; hence the ++ rather than = below. */ - tx->tx_sending++; - - list_add (&tx->tx_list, &conn->ibc_active_txs); - - /* Keep holding ibc_lock while posting sends on this - * connection; vv_post_send() isn't re-entrant on the same - * QP!! */ - - LASSERT (tx->tx_nwrq > 0); -#if 0 - if (tx->tx_wrq[0].wr_type == vv_wr_rdma_write) - CDEBUG(D_NET, "WORK[0]: RDMA gl %p for %d k %x -> "LPX64" k %x\n", - tx->tx_wrq[0].scatgat_list->v_address, - tx->tx_wrq[0].scatgat_list->length, - tx->tx_wrq[0].scatgat_list->l_key, - tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_addr, - tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_r_key); - else - CDEBUG(D_NET, "WORK[0]: %s gl %p for %d k %x\n", - tx->tx_wrq[0].wr_type == vv_wr_send ? "SEND" : "????", - tx->tx_wrq[0].scatgat_list->v_address, - tx->tx_wrq[0].scatgat_list->length, - tx->tx_wrq[0].scatgat_list->l_key); - - if (tx->tx_nwrq > 1) { - if (tx->tx_wrq[1].wr_type == vv_wr_rdma_write) - CDEBUG(D_NET, "WORK[1]: RDMA gl %p for %d k %x -> "LPX64" k %x\n", - tx->tx_wrq[1].scatgat_list->v_address, - tx->tx_wrq[1].scatgat_list->length, - tx->tx_wrq[1].scatgat_list->l_key, - tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_addr, - tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_r_key); - else - CDEBUG(D_NET, "WORK[1]: %s gl %p for %d k %x\n", - tx->tx_wrq[1].wr_type == vv_wr_send ? "SEND" : "????", - tx->tx_wrq[1].scatgat_list->v_address, - tx->tx_wrq[1].scatgat_list->length, - tx->tx_wrq[1].scatgat_list->l_key); - } -#endif - rc = -ECONNABORTED; - vvrc = vv_return_ok; - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { - tx->tx_status = 0; - vvrc = vv_post_send_list(kibnal_data.kib_hca, - conn->ibc_qp, - tx->tx_nwrq, - tx->tx_wrq, - vv_operation_type_send_rc); - rc = (vvrc == vv_return_ok) ? 0 : -EIO; - } - - conn->ibc_last_send = jiffies; - - if (rc != 0) { - /* NB credits are transferred in the actual - * message, which can only be the last work item */ - conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; - if (consume_cred) - conn->ibc_credits++; - conn->ibc_nsends_posted--; - - tx->tx_status = rc; - tx->tx_waiting = 0; - tx->tx_sending--; - - done = (tx->tx_sending == 0); - if (done) - list_del (&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) - CERROR ("Error %d posting transmit to %s\n", - vvrc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - else - CDEBUG (D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_close_conn (conn, rc); - - if (done) - kibnal_tx_done (tx); - return; - } - } - - spin_unlock(&conn->ibc_lock); -} - -void -kibnal_tx_complete (kib_tx_t *tx, vv_comp_status_t vvrc) -{ - kib_conn_t *conn = tx->tx_conn; - int failed = (vvrc != vv_comp_status_success); - int idle; - - CDEBUG(D_NET, "tx %p conn %p sending %d nwrq %d vvrc %d\n", - tx, conn, tx->tx_sending, tx->tx_nwrq, vvrc); - - LASSERT (tx->tx_sending > 0); - - if (failed && - tx->tx_status == 0 && - conn->ibc_state == IBNAL_CONN_ESTABLISHED) - CDEBUG(D_NETERROR, "tx -> %s type %x cookie "LPX64 - "sending %d waiting %d: failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_msg->ibm_type, tx->tx_cookie, - tx->tx_sending, tx->tx_waiting, vvrc); - - spin_lock(&conn->ibc_lock); - - /* I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. */ - - tx->tx_sending--; - conn->ibc_nsends_posted--; - - if (failed) { - tx->tx_waiting = 0; - tx->tx_status = -EIO; - } - - idle = (tx->tx_sending == 0) && /* This is the final callback */ - !tx->tx_waiting && /* Not waiting for peer */ - !tx->tx_queued; /* Not re-queued (PUT_DONE) */ - if (idle) - list_del(&tx->tx_list); - - kibnal_conn_addref(conn); /* 1 ref for me.... */ - - spin_unlock(&conn->ibc_lock); - - if (idle) - kibnal_tx_done (tx); - - if (failed) { - kibnal_close_conn (conn, -EIO); - } else { - kibnal_peer_alive(conn->ibc_peer); - kibnal_check_sends(conn); - } - - kibnal_conn_decref(conn); /* ...until here */ -} - -void -kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob) -{ - vv_scatgat_t *gl = &tx->tx_gl[tx->tx_nwrq]; - vv_wr_t *wrq = &tx->tx_wrq[tx->tx_nwrq]; - int nob = offsetof (kib_msg_t, ibm_u) + body_nob; - __u64 addr = (__u64)((unsigned long)((tx)->tx_msg)); - - LASSERT (tx->tx_nwrq >= 0 && - tx->tx_nwrq < (1 + IBNAL_MAX_RDMA_FRAGS)); - LASSERT (nob <= IBNAL_MSG_SIZE); - - kibnal_init_msg(tx->tx_msg, type, body_nob); - - *gl = (vv_scatgat_t) { - .v_address = KIBNAL_ADDR2SG(addr), - .l_key = tx->tx_lkey, - .length = nob, - }; - - memset(wrq, 0, sizeof(*wrq)); - - wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_TX); - wrq->wr_type = vv_wr_send; - wrq->scatgat_list = gl; - wrq->num_of_data_segments = 1; - wrq->completion_notification = 1; - wrq->type.send.solicited_event = 1; - wrq->type.send.immidiate_data_indicator = 0; - wrq->type.send.send_qp_type.rc_type.fance_indicator = 0; - - tx->tx_nwrq++; -} - -int -kibnal_init_rdma (kib_tx_t *tx, int type, int nob, - kib_rdma_desc_t *dstrd, __u64 dstcookie) -{ - kib_msg_t *ibmsg = tx->tx_msg; - kib_rdma_desc_t *srcrd = tx->tx_rd; - vv_scatgat_t *gl; - vv_wr_t *wrq; - int rc; - -#if IBNAL_USE_FMR - LASSERT (tx->tx_nwrq == 0); - - gl = &tx->tx_gl[0]; - gl->length = nob; - gl->v_address = KIBNAL_ADDR2SG(srcrd->rd_addr); - gl->l_key = srcrd->rd_key; - - wrq = &tx->tx_wrq[0]; - - wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); - wrq->completion_notification = 0; - wrq->scatgat_list = gl; - wrq->num_of_data_segments = 1; - wrq->wr_type = vv_wr_rdma_write; - wrq->type.send.solicited_event = 0; - wrq->type.send.send_qp_type.rc_type.fance_indicator = 0; - wrq->type.send.send_qp_type.rc_type.r_addr = dstrd->rd_addr; - wrq->type.send.send_qp_type.rc_type.r_r_key = dstrd->rd_key; - - tx->tx_nwrq = 1; - rc = nob; -#else - /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */ - int resid = nob; - kib_rdma_frag_t *srcfrag; - int srcidx; - kib_rdma_frag_t *dstfrag; - int dstidx; - int wrknob; - - /* Called by scheduler */ - LASSERT (!in_interrupt()); - - LASSERT (type == IBNAL_MSG_GET_DONE || - type == IBNAL_MSG_PUT_DONE); - - srcidx = dstidx = 0; - srcfrag = &srcrd->rd_frags[0]; - dstfrag = &dstrd->rd_frags[0]; - rc = resid; - - while (resid > 0) { - if (srcidx >= srcrd->rd_nfrag) { - CERROR("Src buffer exhausted: %d frags\n", srcidx); - rc = -EPROTO; - break; - } - - if (dstidx == dstrd->rd_nfrag) { - CERROR("Dst buffer exhausted: %d frags\n", dstidx); - rc = -EPROTO; - break; - } - - if (tx->tx_nwrq == IBNAL_MAX_RDMA_FRAGS) { - CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n", - srcidx, srcrd->rd_nfrag, - dstidx, dstrd->rd_nfrag); - rc = -EMSGSIZE; - break; - } - - wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid); - - gl = &tx->tx_gl[tx->tx_nwrq]; - gl->v_address = KIBNAL_ADDR2SG(kibnal_rf_addr(srcfrag)); - gl->length = wrknob; - gl->l_key = srcrd->rd_key; - - wrq = &tx->tx_wrq[tx->tx_nwrq]; - - wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); - wrq->completion_notification = 0; - wrq->scatgat_list = gl; - wrq->num_of_data_segments = 1; - wrq->wr_type = vv_wr_rdma_write; - wrq->type.send.solicited_event = 0; - wrq->type.send.send_qp_type.rc_type.fance_indicator = 0; - wrq->type.send.send_qp_type.rc_type.r_addr = kibnal_rf_addr(dstfrag); - wrq->type.send.send_qp_type.rc_type.r_r_key = dstrd->rd_key; - - resid -= wrknob; - if (wrknob < srcfrag->rf_nob) { - kibnal_rf_set(srcfrag, - kibnal_rf_addr(srcfrag) + wrknob, - srcfrag->rf_nob - wrknob); - } else { - srcfrag++; - srcidx++; - } - - if (wrknob < dstfrag->rf_nob) { - kibnal_rf_set(dstfrag, - kibnal_rf_addr(dstfrag) + wrknob, - dstfrag->rf_nob - wrknob); - } else { - dstfrag++; - dstidx++; - } - - tx->tx_nwrq++; - } - - if (rc < 0) /* no RDMA if completing with failure */ - tx->tx_nwrq = 0; -#endif - - ibmsg->ibm_u.completion.ibcm_status = rc; - ibmsg->ibm_u.completion.ibcm_cookie = dstcookie; - kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t)); - - return rc; -} - -void -kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn) -{ - spin_lock(&conn->ibc_lock); - kibnal_queue_tx_locked (tx, conn); - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); -} - -void -kibnal_schedule_peer_arp (kib_peer_t *peer) -{ - unsigned long flags; - - LASSERT (peer->ibp_connecting != 0); - LASSERT (peer->ibp_arp_count > 0); - - kibnal_peer_addref(peer); /* extra ref for connd */ - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail (&peer->ibp_connd_list, &kibnal_data.kib_connd_peers); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); -} - -void -kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid) -{ - kib_peer_t *peer; - kib_conn_t *conn; - unsigned long flags; - rwlock_t *g_lock = &kibnal_data.kib_global_lock; - int retry; - int rc; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx->tx_nwrq > 0); /* work items have been set up */ - - for (retry = 0; ; retry = 1) { - read_lock_irqsave(g_lock, flags); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) { - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - kibnal_conn_addref(conn); /* 1 ref for me... */ - read_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...to here */ - return; - } - } - - /* Making one or more connections; I'll need a write lock... */ - read_unlock(g_lock); - write_lock(g_lock); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) - break; - - write_unlock_irqrestore(g_lock, flags); - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); - - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - - rc = kibnal_add_persistent_peer(nid, LNET_NIDADDR(nid)); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_nid2str(nid), rc); - - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - } - - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - kibnal_conn_addref(conn); /* 1 ref for me... */ - write_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...until here */ - return; - } - - if (peer->ibp_connecting == 0 && - peer->ibp_accepting == 0) { - if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */ - time_after_eq(jiffies, peer->ibp_reconnect_time))) { - write_unlock_irqrestore(g_lock, flags); - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - - peer->ibp_connecting = 1; - peer->ibp_arp_count = 1 + *kibnal_tunables.kib_arp_retries; - kibnal_schedule_peer_arp(peer); - } - - /* A connection is being established; queue the message... */ - list_add_tail (&tx->tx_list, &peer->ibp_tx_queue); - - write_unlock_irqrestore(g_lock, flags); -} - -int -kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kib_msg_t *ibmsg; - kib_tx_t *tx; - int nob; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* Thread context */ - LASSERT (!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - switch (type) { - default: - LBUG(); - return (-EIO); - - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can allocate txd for GET to %s: \n", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.get.ibgm_hdr = *hdr; - ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie; - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - rc = kibnal_setup_rd_iov(tx, &ibmsg->ibm_u.get.ibgm_rd, - vv_acc_r_mem_write, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kibnal_setup_rd_kiov(tx, &ibmsg->ibm_u.get.ibgm_rd, - vv_acc_r_mem_write, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc != 0) { - CERROR("Can't setup GET sink for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kibnal_tx_done(tx); - return -EIO; - } - -#if IBNAL_USE_FMR - nob = sizeof(kib_get_msg_t); -#else - { - int n = ibmsg->ibm_u.get.ibgm_rd.rd_nfrag; - - nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]); - } -#endif - kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob); - - tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni, - lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR("Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - kibnal_tx_done(tx); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */ - tx->tx_waiting = 1; /* waiting for GET_DONE */ - kibnal_launch_tx(tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't allocate %s txd for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - if (payload_kiov == NULL) - rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 0, - payload_niov, payload_iov, - payload_offset, payload_nob); - else - rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 0, - payload_niov, payload_kiov, - payload_offset, payload_nob); - if (rc != 0) { - CERROR("Can't setup PUT src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kibnal_tx_done(tx); - return -EIO; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.putreq.ibprm_hdr = *hdr; - ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie; - kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_REQ, sizeof(kib_putreq_msg_t)); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */ - kibnal_launch_tx(tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]) - <= IBNAL_MSG_SIZE); - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR ("Can't send %d to %s: tx descs exhausted\n", - type, libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); - - nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]); - kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - kibnal_launch_tx(tx, target.nid); - return 0; -} - -void -kibnal_reply (lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg) -{ - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kib_tx_t *tx; - int rc; - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't get tx for REPLY to %s\n", - libcfs_nid2str(target.nid)); - goto failed_0; - } - - if (nob == 0) - rc = 0; - else if (kiov == NULL) - rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 0, - niov, iov, offset, nob); - else - rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 0, - niov, kiov, offset, nob); - - if (rc != 0) { - CERROR("Can't setup GET src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - rc = kibnal_init_rdma(tx, IBNAL_MSG_GET_DONE, nob, - &rx->rx_msg->ibm_u.get.ibgm_rd, - rx->rx_msg->ibm_u.get.ibgm_cookie); - if (rc < 0) { - CERROR("Can't setup rdma for GET from %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - if (rc == 0) { - /* No RDMA: local completion may happen now! */ - lnet_finalize(ni, lntmsg, 0); - } else { - /* RDMA: lnet_finalize(lntmsg) when it - * completes */ - tx->tx_lntmsg[0] = lntmsg; - } - - kibnal_queue_tx(tx, rx->rx_conn); - return; - - failed_1: - kibnal_tx_done(tx); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - void **new_private) -{ - kib_rx_t *rx = private; - kib_conn_t *conn = rx->rx_conn; - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* Can't block if RDMA completions need normal credits */ - LCONSOLE_ERROR_MSG(0x129, "Dropping message from %s: no buffers" - " free. %s is running an old version of LNET " - "that may deadlock if messages wait for" - "buffers) \n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return -EDEADLK; - } - - *new_private = private; - return 0; -} - -int -kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kib_rx_t *rx = private; - kib_msg_t *rxmsg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - kib_tx_t *tx; - kib_msg_t *txmsg; - int nob; - int post_cred = 1; - int rc = 0; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBNAL_MSG_IMMEDIATE: - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov(niov, kiov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - lnet_finalize (ni, lntmsg, 0); - break; - - case IBNAL_MSG_PUT_REQ: - if (mlen == 0) { - lnet_finalize(ni, lntmsg, 0); - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't allocate tx for %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* Not replying will break the connection */ - rc = -ENOMEM; - break; - } - - txmsg = tx->tx_msg; - if (kiov == NULL) - rc = kibnal_setup_rd_iov(tx, - &txmsg->ibm_u.putack.ibpam_rd, - vv_acc_r_mem_write, - niov, iov, offset, mlen); - else - rc = kibnal_setup_rd_kiov(tx, - &txmsg->ibm_u.putack.ibpam_rd, - vv_acc_r_mem_write, - niov, kiov, offset, mlen); - if (rc != 0) { - CERROR("Can't setup PUT sink for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kibnal_tx_done(tx); - /* tell peer it's over */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie; - txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie; -#if IBNAL_USE_FMR - nob = sizeof(kib_putack_msg_t); -#else - { - int n = tx->tx_msg->ibm_u.putack.ibpam_rd.rd_nfrag; - - nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]); - } -#endif - kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_ACK, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_DONE */ - kibnal_queue_tx(tx, conn); - - if (conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) - post_cred = 0; /* peer still owns 'rx' for sending PUT_DONE */ - break; - - case IBNAL_MSG_GET_REQ: - if (lntmsg != NULL) { - /* Optimized GET; RDMA lntmsg's payload */ - kibnal_reply(ni, rx, lntmsg); - } else { - /* GET didn't match anything */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE, - -ENODATA, - rxmsg->ibm_u.get.ibgm_cookie); - } - break; - } - - kibnal_post_rx(rx, post_cred, 0); - return rc; -} - -int -kibnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kibnal_data.kib_nthreads); - return (0); -} - -void -kibnal_thread_fini (void) -{ - atomic_dec (&kibnal_data.kib_nthreads); -} - -void -kibnal_peer_alive (kib_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->ibp_last_alive = cfs_time_current(); - mb(); -} - -void -kibnal_peer_notify (kib_peer_t *peer) -{ - time_t last_alive = 0; - int error = 0; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (list_empty(&peer->ibp_conns) && - peer->ibp_accepting == 0 && - peer->ibp_connecting == 0 && - peer->ibp_error != 0) { - error = peer->ibp_error; - peer->ibp_error = 0; - - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive); - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (error != 0) - lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive); -} - -void -kibnal_schedule_conn (kib_conn_t *conn) -{ - unsigned long flags; - - kibnal_conn_addref(conn); /* ++ref for connd */ - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); -} - -void -kibnal_close_conn_locked (kib_conn_t *conn, int error) -{ - /* This just does the immediate housekeeping. 'error' is zero for a - * normal shutdown which can happen only after the connection has been - * established. If the connection is established, schedule the - * connection to be finished off by the connd. Otherwise the connd is - * already dealing with it (either to set it up or tear it down). - * Caller holds kib_global_lock exclusively in irq context */ - kib_peer_t *peer = conn->ibc_peer; - - LASSERT (error != 0 || conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - if (error != 0 && conn->ibc_comms_error == 0) - conn->ibc_comms_error = error; - - if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) - return; /* already being handled */ - - /* NB Can't take ibc_lock here (could be in IRQ context), without - * risking deadlock, so access to ibc_{tx_queue,active_txs} is racey */ - - if (error == 0 && - list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_rsrvd) && - list_empty(&conn->ibc_tx_queue_nocred) && - list_empty(&conn->ibc_active_txs)) { - CDEBUG(D_NET, "closing conn to %s" - " rx# "LPD64" tx# "LPD64"\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_txseq, conn->ibc_rxseq); - } else { - CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s" - " rx# "LPD64" tx# "LPD64"\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)", - conn->ibc_txseq, conn->ibc_rxseq); - } - - list_del (&conn->ibc_list); - - if (list_empty (&peer->ibp_conns)) { /* no more conns */ - if (peer->ibp_persistence == 0 && /* non-persistent peer */ - kibnal_peer_active(peer)) /* still in peer table */ - kibnal_unlink_peer_locked (peer); - - /* set/clear error on last conn */ - peer->ibp_error = conn->ibc_comms_error; - } - - kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECT1); - - kibnal_schedule_conn(conn); - kibnal_conn_decref(conn); /* lose ibc_list's ref */ -} - -void -kibnal_close_conn (kib_conn_t *conn, int error) -{ - unsigned long flags; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - kibnal_close_conn_locked (conn, error); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_handle_early_rxs(kib_conn_t *conn) -{ - unsigned long flags; - kib_rx_t *rx; - - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - while (!list_empty(&conn->ibc_early_rxs)) { - rx = list_entry(conn->ibc_early_rxs.next, - kib_rx_t, rx_list); - list_del(&rx->rx_list); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_handle_rx(rx); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_abort_txs(kib_conn_t *conn, struct list_head *txs) -{ - LIST_HEAD (zombies); - struct list_head *tmp; - struct list_head *nxt; - kib_tx_t *tx; - - spin_lock(&conn->ibc_lock); - - list_for_each_safe (tmp, nxt, txs) { - tx = list_entry (tmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_queued = 0; - tx->tx_waiting = 0; - - if (tx->tx_sending == 0) { - list_del (&tx->tx_list); - list_add (&tx->tx_list, &zombies); - } - } - - spin_unlock(&conn->ibc_lock); - - kibnal_txlist_done(&zombies, -ECONNABORTED); -} - -void -kibnal_conn_disconnected(kib_conn_t *conn) -{ - /* I'm the connd */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state >= IBNAL_CONN_INIT); - - kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTED); - - /* move QP to error state to make posted work items complete */ - kibnal_set_qp_state(conn, vv_qp_state_error); - - /* Complete all tx descs not waiting for sends to complete. - * NB we should be safe from RDMA now that the QP has changed state */ - - kibnal_abort_txs(conn, &conn->ibc_tx_queue); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_nocred); - kibnal_abort_txs(conn, &conn->ibc_active_txs); - - kibnal_handle_early_rxs(conn); - - kibnal_peer_notify(conn->ibc_peer); -} - -void -kibnal_peer_connect_failed (kib_peer_t *peer, int active, int error) -{ - LIST_HEAD (zombies); - unsigned long flags; - - /* Only the connd creates conns => single threaded */ - LASSERT (error != 0); - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (active) { - LASSERT (peer->ibp_connecting != 0); - peer->ibp_connecting--; - } else { - LASSERT (peer->ibp_accepting != 0); - peer->ibp_accepting--; - } - - if (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0) { - /* another connection attempt under way (loopback?)... */ - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return; - } - - if (list_empty(&peer->ibp_conns)) { - /* Say when active connection can be re-attempted */ - peer->ibp_reconnect_interval *= 2; - peer->ibp_reconnect_interval = - MAX(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_min_reconnect_interval); - peer->ibp_reconnect_interval = - MIN(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_max_reconnect_interval); - - peer->ibp_reconnect_time = jiffies + - peer->ibp_reconnect_interval * HZ; - - /* Take peer's blocked transmits to complete with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kibnal_peer_active(peer) && - (peer->ibp_persistence == 0)) { - /* failed connection attempt on non-persistent peer */ - kibnal_unlink_peer_locked (peer); - } - - peer->ibp_error = error; - } else { - /* Can't have blocked transmits if there are connections */ - LASSERT (list_empty(&peer->ibp_tx_queue)); - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_peer_notify(peer); - - if (list_empty (&zombies)) - return; - - CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_txlist_done(&zombies, -EHOSTUNREACH); -} - -void -kibnal_reject(cm_cep_handle_t cep, int why) -{ - static cm_reject_data_t rejs[3]; - cm_reject_data_t *rej = &rejs[why]; - - LASSERT (why >= 0 && why < sizeof(rejs)/sizeof(rejs[0])); - - /* If I wasn't so lazy, I'd initialise this only once; it's effective - * read-only */ - rej->reason = cm_rej_code_usr_rej; - rej->priv_data[0] = (IBNAL_MSG_MAGIC) & 0xff; - rej->priv_data[1] = (IBNAL_MSG_MAGIC >> 8) & 0xff; - rej->priv_data[2] = (IBNAL_MSG_MAGIC >> 16) & 0xff; - rej->priv_data[3] = (IBNAL_MSG_MAGIC >> 24) & 0xff; - rej->priv_data[4] = (IBNAL_MSG_VERSION) & 0xff; - rej->priv_data[5] = (IBNAL_MSG_VERSION >> 8) & 0xff; - rej->priv_data[6] = why; - - cm_reject(cep, rej); -} - -void -kibnal_connreq_done(kib_conn_t *conn, int active, int status) -{ - struct list_head txs; - kib_peer_t *peer = conn->ibc_peer; - unsigned long flags; - kib_tx_t *tx; - - CDEBUG(D_NET,"%d\n", status); - - /* Only the connd creates conns => single threaded */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED); - - if (active) { - LASSERT (peer->ibp_connecting > 0); - } else { - LASSERT (peer->ibp_accepting > 0); - } - - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - conn->ibc_connvars = NULL; - - if (status != 0) { - /* failed to establish connection */ - switch (conn->ibc_state) { - default: - LBUG(); - - case IBNAL_CONN_ACTIVE_CHECK_REPLY: - /* got a connection reply but failed checks */ - LASSERT (active); - kibnal_reject(conn->ibc_cep, IBNAL_REJECT_FATAL); - break; - - case IBNAL_CONN_ACTIVE_CONNECT: - LASSERT (active); - cm_cancel(conn->ibc_cep); - cfs_pause(cfs_time_seconds(1)/10); - /* cm_connect() failed immediately or - * callback returned failure */ - break; - - case IBNAL_CONN_ACTIVE_ARP: - LASSERT (active); - /* ibat_get_ib_data() failed immediately - * or callback returned failure */ - break; - - case IBNAL_CONN_INIT: - break; - - case IBNAL_CONN_PASSIVE_WAIT: - LASSERT (!active); - /* cm_accept callback returned failure */ - break; - } - - kibnal_peer_connect_failed(peer, active, status); - kibnal_conn_disconnected(conn); - return; - } - - /* connection established */ - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (active) { - LASSERT(conn->ibc_state == IBNAL_CONN_ACTIVE_RTU); - } else { - LASSERT(conn->ibc_state == IBNAL_CONN_PASSIVE_WAIT); - } - - conn->ibc_last_send = jiffies; - kibnal_set_conn_state(conn, IBNAL_CONN_ESTABLISHED); - kibnal_peer_alive(peer); - - /* Add conn to peer's list and nuke any dangling conns from a different - * peer instance... */ - kibnal_conn_addref(conn); /* +1 ref for ibc_list */ - list_add(&conn->ibc_list, &peer->ibp_conns); - kibnal_close_stale_conns_locked (peer, conn->ibc_incarnation); - - if (!kibnal_peer_active(peer) || /* peer has been deleted */ - conn->ibc_comms_error != 0 || /* comms error */ - conn->ibc_disconnect) { /* need to disconnect */ - - /* start to shut down connection */ - kibnal_close_conn_locked(conn, -ECONNABORTED); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - kibnal_peer_connect_failed(peer, active, -ECONNABORTED); - return; - } - - if (active) - peer->ibp_connecting--; - else - peer->ibp_accepting--; - - /* grab pending txs while I have the lock */ - list_add(&txs, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */ - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - /* Schedule blocked txs */ - spin_lock (&conn->ibc_lock); - while (!list_empty (&txs)) { - tx = list_entry (txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - kibnal_queue_tx_locked (tx, conn); - } - spin_unlock (&conn->ibc_lock); - kibnal_check_sends (conn); - - /* schedule blocked rxs */ - kibnal_handle_early_rxs(conn); -} - -void -kibnal_cm_callback(cm_cep_handle_t cep, cm_conn_data_t *cmdata, void *arg) -{ - static cm_dreply_data_t drep; /* just zeroed space */ - - kib_conn_t *conn = (kib_conn_t *)arg; - unsigned long flags; - - /* CAVEAT EMPTOR: tasklet context */ - - switch (cmdata->status) { - default: - LBUG(); - - case cm_event_disconn_request: - /* IBNAL_CONN_ACTIVE_RTU: gets closed in kibnal_connreq_done - * IBNAL_CONN_ESTABLISHED: I start it closing - * otherwise: it's closing anyway */ - cm_disconnect(conn->ibc_cep, NULL, &drep); - cm_cancel(conn->ibc_cep); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - LASSERT (!conn->ibc_disconnect); - conn->ibc_disconnect = 1; - - switch (conn->ibc_state) { - default: - LBUG(); - - case IBNAL_CONN_ACTIVE_RTU: - /* kibnal_connreq_done is getting there; It'll see - * ibc_disconnect set... */ - break; - - case IBNAL_CONN_ESTABLISHED: - /* kibnal_connreq_done got there already; get - * disconnect going... */ - kibnal_close_conn_locked(conn, 0); - break; - - case IBNAL_CONN_DISCONNECT1: - /* kibnal_disconnect_conn is getting there; It'll see - * ibc_disconnect set... */ - break; - - case IBNAL_CONN_DISCONNECT2: - /* kibnal_disconnect_conn got there already; complete - * the disconnect. */ - kibnal_schedule_conn(conn); - break; - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - break; - - case cm_event_disconn_timeout: - case cm_event_disconn_reply: - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECT2); - LASSERT (!conn->ibc_disconnect); - conn->ibc_disconnect = 1; - - /* kibnal_disconnect_conn sent the disconnect request. */ - kibnal_schedule_conn(conn); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - break; - - case cm_event_connected: - case cm_event_conn_timeout: - case cm_event_conn_reject: - LASSERT (conn->ibc_state == IBNAL_CONN_PASSIVE_WAIT); - conn->ibc_connvars->cv_conndata = *cmdata; - - kibnal_schedule_conn(conn); - break; - } - - kibnal_conn_decref(conn); /* lose my ref */ -} - -void -kibnal_check_passive_wait(kib_conn_t *conn) -{ - int rc; - - switch (conn->ibc_connvars->cv_conndata.status) { - default: - LBUG(); - - case cm_event_connected: - kibnal_conn_addref(conn); /* ++ ref for CM callback */ - rc = kibnal_set_qp_state(conn, vv_qp_state_rts); - if (rc != 0) - conn->ibc_comms_error = rc; - /* connection _has_ been established; it's just that we've had - * an error immediately... */ - kibnal_connreq_done(conn, 0, 0); - break; - - case cm_event_conn_timeout: - kibnal_connreq_done(conn, 0, -ETIMEDOUT); - break; - - case cm_event_conn_reject: - kibnal_connreq_done(conn, 0, -ECONNRESET); - break; - } -} - -void -kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq) -{ - static kib_msg_t txmsg; - static kib_msg_t rxmsg; - static cm_reply_data_t reply; - - kib_conn_t *conn = NULL; - int rc = 0; - int reason; - int rxmsgnob; - rwlock_t *g_lock = &kibnal_data.kib_global_lock; - kib_peer_t *peer; - kib_peer_t *peer2; - unsigned long flags; - kib_connvars_t *cv; - cm_return_t cmrc; - vv_return_t vvrc; - - /* I'm the connd executing in thread context - * No concurrency problems with static data! */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - if (cmreq->sid != (__u64)(*kibnal_tunables.kib_service_number)) { - CERROR(LPX64" != IBNAL_SERVICE_NUMBER("LPX64")\n", - cmreq->sid, (__u64)(*kibnal_tunables.kib_service_number)); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - /* copy into rxmsg to avoid alignment issues */ - rxmsgnob = MIN(cm_REQ_priv_data_len, sizeof(rxmsg)); - memcpy(&rxmsg, cmreq->priv_data, rxmsgnob); - - rc = kibnal_unpack_msg(&rxmsg, 0, rxmsgnob); - if (rc != 0) { - /* SILENT! kibnal_unpack_msg() complains if required */ - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (rxmsg.ibm_version != IBNAL_MSG_VERSION) - CWARN("Connection from %s: old protocol version 0x%x\n", - libcfs_nid2str(rxmsg.ibm_srcnid), rxmsg.ibm_version); - - if (rxmsg.ibm_type != IBNAL_MSG_CONNREQ) { - CERROR("Unexpected connreq msg type: %x from %s\n", - rxmsg.ibm_type, libcfs_nid2str(rxmsg.ibm_srcnid)); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - rxmsg.ibm_dstnid)) { - CERROR("Can't accept %s: bad dst nid %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid), - libcfs_nid2str(rxmsg.ibm_dstnid)); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (rxmsg.ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) { - CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n", - libcfs_nid2str(rxmsg.ibm_srcnid), - rxmsg.ibm_u.connparams.ibcp_queue_depth, - IBNAL_MSG_QUEUE_SIZE); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (rxmsg.ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE) { - CERROR("Can't accept %s: message size %d too big (%d max)\n", - libcfs_nid2str(rxmsg.ibm_srcnid), - rxmsg.ibm_u.connparams.ibcp_max_msg_size, - IBNAL_MSG_SIZE); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (rxmsg.ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Can't accept %s: max frags %d too big (%d max)\n", - libcfs_nid2str(rxmsg.ibm_srcnid), - rxmsg.ibm_u.connparams.ibcp_max_frags, - IBNAL_MAX_RDMA_FRAGS); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - /* assume 'rxmsg.ibm_srcnid' is a new peer; create */ - rc = kibnal_create_peer (&peer, rxmsg.ibm_srcnid); - if (rc != 0) { - CERROR("Can't create peer for %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid)); - reason = IBNAL_REJECT_NO_RESOURCES; - goto reject; - } - - write_lock_irqsave(g_lock, flags); - - if (kibnal_data.kib_listen_handle == NULL) { - write_unlock_irqrestore(g_lock, flags); - - CWARN ("Shutdown has started, rejecting connreq from %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid)); - kibnal_peer_decref(peer); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - peer2 = kibnal_find_peer_locked(rxmsg.ibm_srcnid); - if (peer2 != NULL) { - /* tie-break connection race in favour of the higher NID */ - if (peer2->ibp_connecting != 0 && - rxmsg.ibm_srcnid < kibnal_data.kib_ni->ni_nid) { - write_unlock_irqrestore(g_lock, flags); - - CWARN("Conn race %s\n", - libcfs_nid2str(peer2->ibp_nid)); - - kibnal_peer_decref(peer); - reason = IBNAL_REJECT_CONN_RACE; - goto reject; - } - - peer2->ibp_accepting++; - kibnal_peer_addref(peer2); - - write_unlock_irqrestore(g_lock, flags); - kibnal_peer_decref(peer); - peer = peer2; - } else { - /* Brand new peer */ - LASSERT (peer->ibp_accepting == 0); - peer->ibp_accepting = 1; - - kibnal_peer_addref(peer); - list_add_tail(&peer->ibp_list, kibnal_nid2peerlist(rxmsg.ibm_srcnid)); - - write_unlock_irqrestore(g_lock, flags); - } - - conn = kibnal_create_conn(cep); - if (conn == NULL) { - CERROR("Can't create conn for %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid)); - kibnal_peer_connect_failed(peer, 0, -ENOMEM); - kibnal_peer_decref(peer); - reason = IBNAL_REJECT_NO_RESOURCES; - goto reject; - } - - conn->ibc_version = rxmsg.ibm_version; - - conn->ibc_peer = peer; /* conn takes over my ref */ - conn->ibc_incarnation = rxmsg.ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBNAL_RX_MSGS); - - cv = conn->ibc_connvars; - - cv->cv_txpsn = cmreq->cep_data.start_psn; - cv->cv_remote_qpn = cmreq->cep_data.qpn; - cv->cv_path = cmreq->path_data.path; - cv->cv_rnr_count = cmreq->cep_data.rtr_retry_cnt; - // XXX cmreq->cep_data.retry_cnt; - cv->cv_port = cmreq->cep_data.local_port_num; - - vvrc = gid2gid_index(kibnal_data.kib_hca, cv->cv_port, - &cv->cv_path.sgid, &cv->cv_sgid_index); - if (vvrc != vv_return_ok) { - CERROR("gid2gid_index failed for %s: %d\n", - libcfs_nid2str(rxmsg.ibm_srcnid), vvrc); - rc = -EIO; - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - vvrc = pkey2pkey_index(kibnal_data.kib_hca, cv->cv_port, - cv->cv_path.pkey, &cv->cv_pkey_index); - if (vvrc != vv_return_ok) { - CERROR("pkey2pkey_index failed for %s: %d\n", - libcfs_nid2str(rxmsg.ibm_srcnid), vvrc); - rc = -EIO; - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_init); - if (rc != 0) { - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - rc = kibnal_post_receives(conn); - if (rc != 0) { - CERROR("Can't post receives for %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid)); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_rtr); - if (rc != 0) { - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - memset(&reply, 0, sizeof(reply)); - reply.qpn = cv->cv_local_qpn; - reply.qkey = IBNAL_QKEY; - reply.start_psn = cv->cv_rxpsn; - reply.arb_initiator_depth = IBNAL_ARB_INITIATOR_DEPTH; - reply.arb_resp_res = IBNAL_ARB_RESP_RES; - reply.failover_accepted = IBNAL_FAILOVER_ACCEPTED; - reply.rnr_retry_count = cv->cv_rnr_count; - reply.targ_ack_delay = kibnal_data.kib_hca_attrs.ack_delay; - - /* setup txmsg... */ - memset(&txmsg, 0, sizeof(txmsg)); - kibnal_init_msg(&txmsg, IBNAL_MSG_CONNACK, - sizeof(txmsg.ibm_u.connparams)); - LASSERT (txmsg.ibm_nob <= cm_REP_priv_data_len); - txmsg.ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - txmsg.ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE; - txmsg.ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS; - kibnal_pack_msg(&txmsg, conn->ibc_version, - 0, rxmsg.ibm_srcnid, rxmsg.ibm_srcstamp, 0); - - /* ...and copy into reply to avoid alignment issues */ - memcpy(&reply.priv_data, &txmsg, txmsg.ibm_nob); - - kibnal_set_conn_state(conn, IBNAL_CONN_PASSIVE_WAIT); - - cmrc = cm_accept(conn->ibc_cep, &reply, NULL, - kibnal_cm_callback, conn); - - if (cmrc == cm_stat_success) - return; /* callback has got my ref on conn */ - - /* back out state change (no callback happening) */ - kibnal_set_conn_state(conn, IBNAL_CONN_INIT); - rc = -EIO; - reason = IBNAL_REJECT_FATAL; - - reject: - CDEBUG(D_NET, "Rejecting connreq from %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid)); - - kibnal_reject(cep, reason); - - if (conn != NULL) { - LASSERT (rc != 0); - kibnal_connreq_done(conn, 0, rc); - } else { - cm_destroy_cep(cep); - } -} - -void -kibnal_listen_callback(cm_cep_handle_t cep, cm_conn_data_t *data, void *arg) -{ - cm_request_data_t *cmreq = &data->data.request; - kib_pcreq_t *pcr; - unsigned long flags; - - LASSERT (arg == NULL); - - if (data->status != cm_event_conn_request) { - CERROR("status %d is not cm_event_conn_request\n", - data->status); - return; - } - - LIBCFS_ALLOC_ATOMIC(pcr, sizeof(*pcr)); - if (pcr == NULL) { - CERROR("Can't allocate passive connreq\n"); - - kibnal_reject(cep, IBNAL_REJECT_NO_RESOURCES); - cm_destroy_cep(cep); - return; - } - - pcr->pcr_cep = cep; - pcr->pcr_cmreq = *cmreq; - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail(&pcr->pcr_list, &kibnal_data.kib_connd_pcreqs); - wake_up(&kibnal_data.kib_connd_waitq); -spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); -} - - -void -kibnal_active_connect_callback (cm_cep_handle_t cep, cm_conn_data_t *cd, - void *arg) -{ - /* CAVEAT EMPTOR: tasklet context */ - kib_conn_t *conn = (kib_conn_t *)arg; - kib_connvars_t *cv = conn->ibc_connvars; - - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT); - cv->cv_conndata = *cd; - - kibnal_schedule_conn(conn); - kibnal_conn_decref(conn); -} - -void -kibnal_connect_conn (kib_conn_t *conn) -{ - static cm_request_data_t cmreq; - static kib_msg_t msg; - - kib_connvars_t *cv = conn->ibc_connvars; - kib_peer_t *peer = conn->ibc_peer; - cm_return_t cmrc; - - /* Only called by connd => statics OK */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP); - - memset(&cmreq, 0, sizeof(cmreq)); - - cmreq.sid = (__u64)(*kibnal_tunables.kib_service_number); - - cmreq.cep_data.ca_guid = kibnal_data.kib_hca_attrs.guid; - cmreq.cep_data.qpn = cv->cv_local_qpn; - cmreq.cep_data.retry_cnt = *kibnal_tunables.kib_retry_cnt; - cmreq.cep_data.rtr_retry_cnt = *kibnal_tunables.kib_rnr_cnt; - cmreq.cep_data.start_psn = cv->cv_rxpsn; - cmreq.cep_data.end_to_end_flow_ctrl = IBNAL_EE_FLOW_CNT; - // XXX ack_timeout? - // offered_resp_res - // offered_initiator_depth - - cmreq.path_data.subn_local = IBNAL_LOCAL_SUB; - cmreq.path_data.path = cv->cv_path; - - /* setup msg... */ - memset(&msg, 0, sizeof(msg)); - kibnal_init_msg(&msg, IBNAL_MSG_CONNREQ, sizeof(msg.ibm_u.connparams)); - LASSERT(msg.ibm_nob <= cm_REQ_priv_data_len); - msg.ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - msg.ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE; - msg.ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS; - kibnal_pack_msg(&msg, conn->ibc_version, 0, peer->ibp_nid, 0, 0); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - msg.ibm_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - msg.ibm_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - /* ...and copy into cmreq to avoid alignment issues */ - memcpy(&cmreq.priv_data, &msg, msg.ibm_nob); - - CDEBUG(D_NET, "Connecting %p to %s\n", conn, - libcfs_nid2str(peer->ibp_nid)); - - kibnal_conn_addref(conn); /* ++ref for CM callback */ - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_CONNECT); - - cmrc = cm_connect(conn->ibc_cep, &cmreq, - kibnal_active_connect_callback, conn); - if (cmrc == cm_stat_success) { - CDEBUG(D_NET, "connection REQ sent to %s\n", - libcfs_nid2str(peer->ibp_nid)); - return; - } - - CERROR ("Connect %s failed: %d\n", libcfs_nid2str(peer->ibp_nid), cmrc); - kibnal_conn_decref(conn); /* drop callback's ref */ - kibnal_connreq_done(conn, 1, -EHOSTUNREACH); -} - -void -kibnal_reconnect (kib_conn_t *conn, int why) -{ - kib_peer_t *peer = conn->ibc_peer; - int retry; - unsigned long flags; - cm_return_t cmrc; - cm_cep_handle_t cep; - - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT); - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - LASSERT (peer->ibp_connecting > 0); /* 'conn' at least */ - - /* retry connection if it's still needed and no other connection - * attempts (active or passive) are in progress. - * Immediate reconnect is required, so I don't even look at the - * reconnection timeout etc */ - - retry = (!list_empty(&peer->ibp_tx_queue) && - peer->ibp_connecting == 1 && - peer->ibp_accepting == 0); - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (!retry) { - kibnal_connreq_done(conn, 1, why); - return; - } - - cep = cm_create_cep(cm_cep_transp_rc); - if (cep == NULL) { - CERROR("Can't create new CEP\n"); - kibnal_connreq_done(conn, 1, -ENOMEM); - return; - } - - cmrc = cm_cancel(conn->ibc_cep); - LASSERT (cmrc == cm_stat_success); - cmrc = cm_destroy_cep(conn->ibc_cep); - LASSERT (cmrc == cm_stat_success); - - conn->ibc_cep = cep; - - /* reuse conn; no need to peer->ibp_connecting++ */ - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_ARP); - kibnal_connect_conn(conn); -} - -void -kibnal_check_connreply (kib_conn_t *conn) -{ - static cm_rtu_data_t rtu; - static kib_msg_t msg; - - kib_connvars_t *cv = conn->ibc_connvars; - cm_reply_data_t *reply = &cv->cv_conndata.data.reply; - kib_peer_t *peer = conn->ibc_peer; - int msgnob; - cm_return_t cmrc; - unsigned long flags; - int rc; - - /* Only called by connd => statics OK */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT); - - if (cv->cv_conndata.status == cm_event_conn_reply) { - cv->cv_remote_qpn = reply->qpn; - cv->cv_txpsn = reply->start_psn; - // XXX reply->targ_ack_delay; - cv->cv_rnr_count = reply->rnr_retry_count; - - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_CHECK_REPLY); - - /* copy into msg to avoid alignment issues */ - msgnob = MIN(cm_REP_priv_data_len, sizeof(msg)); - memcpy(&msg, &reply->priv_data, msgnob); - - rc = kibnal_unpack_msg(&msg, conn->ibc_version, msgnob); - if (rc != 0) { - CERROR("Can't unpack reply from %s\n", - libcfs_nid2str(peer->ibp_nid)); - kibnal_connreq_done(conn, 1, rc); - return; - } - - if (msg.ibm_type != IBNAL_MSG_CONNACK ) { - CERROR("Unexpected message type %d from %s\n", - msg.ibm_type, libcfs_nid2str(peer->ibp_nid)); - kibnal_connreq_done(conn, 1, -EPROTO); - return; - } - - if (msg.ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) { - CERROR("%s has incompatible queue depth %d(%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg.ibm_u.connparams.ibcp_queue_depth, - IBNAL_MSG_QUEUE_SIZE); - kibnal_connreq_done(conn, 1, -EPROTO); - return; - } - - if (msg.ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE) { - CERROR("%s max message size %d too big (%d max)\n", - libcfs_nid2str(peer->ibp_nid), - msg.ibm_u.connparams.ibcp_max_msg_size, - IBNAL_MSG_SIZE); - kibnal_connreq_done(conn, 1, -EPROTO); - return; - } - - if (msg.ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) { - CERROR("%s max frags %d too big (%d max)\n", - libcfs_nid2str(peer->ibp_nid), - msg.ibm_u.connparams.ibcp_max_frags, - IBNAL_MAX_RDMA_FRAGS); - kibnal_connreq_done(conn, 1, -EPROTO); - return; - } - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - if (lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg.ibm_dstnid) && - msg.ibm_dststamp == kibnal_data.kib_incarnation) - rc = 0; - else - rc = -ESTALE; - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - if (rc != 0) { - CERROR("Stale connection reply from %s\n", - libcfs_nid2str(peer->ibp_nid)); - kibnal_connreq_done(conn, 1, rc); - return; - } - - conn->ibc_incarnation = msg.ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBNAL_RX_MSGS); - - rc = kibnal_post_receives(conn); - if (rc != 0) { - CERROR("Can't post receives for %s\n", - libcfs_nid2str(peer->ibp_nid)); - kibnal_connreq_done(conn, 1, rc); - return; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_rtr); - if (rc != 0) { - kibnal_connreq_done(conn, 1, rc); - return; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_rts); - if (rc != 0) { - kibnal_connreq_done(conn, 1, rc); - return; - } - - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_RTU); - kibnal_conn_addref(conn); /* ++for CM callback */ - - memset(&rtu, 0, sizeof(rtu)); - cmrc = cm_accept(conn->ibc_cep, NULL, &rtu, - kibnal_cm_callback, conn); - if (cmrc == cm_stat_success) { - /* Now I'm racing with disconnect signalled by - * kibnal_cm_callback */ - kibnal_connreq_done(conn, 1, 0); - return; - } - - CERROR("cm_accept %s failed: %d\n", - libcfs_nid2str(peer->ibp_nid), cmrc); - /* Back out of RTU: no callback coming */ - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_CHECK_REPLY); - kibnal_conn_decref(conn); - kibnal_connreq_done(conn, 1, -EIO); - return; - } - - if (cv->cv_conndata.status == cm_event_conn_reject) { - - if (cv->cv_conndata.data.reject.reason == cm_rej_code_usr_rej) { - unsigned char *bytes = - cv->cv_conndata.data.reject.priv_data; - int magic = (bytes[0]) | - (bytes[1] << 8) | - (bytes[2] << 16) | - (bytes[3] << 24); - int version = (bytes[4]) | - (bytes[5] << 8); - int why = (bytes[6]); - - /* Expected proto/version: she just doesn't like me (or - * ran out of resources) */ - if (magic == IBNAL_MSG_MAGIC && - version == conn->ibc_version) { - CERROR("conn -> %s rejected: fatal error %d\n", - libcfs_nid2str(peer->ibp_nid), why); - - if (why == IBNAL_REJECT_CONN_RACE) - kibnal_reconnect(conn, -EALREADY); - else - kibnal_connreq_done(conn, 1, -ECONNREFUSED); - return; - } - - /* Fail unless it's worth retrying with an old proto - * version */ - if (!(magic == IBNAL_MSG_MAGIC && - version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD && - conn->ibc_version == IBNAL_MSG_VERSION)) { - CERROR("conn -> %s rejected: bad protocol " - "magic/ver %08x/%x why %d\n", - libcfs_nid2str(peer->ibp_nid), - magic, version, why); - - kibnal_connreq_done(conn, 1, -ECONNREFUSED); - return; - } - - conn->ibc_version = version; - CWARN ("Connection to %s refused: " - "retrying with old protocol version 0x%x\n", - libcfs_nid2str(peer->ibp_nid), version); - - kibnal_reconnect(conn, -ECONNREFUSED); - return; - } else if (cv->cv_conndata.data.reject.reason == - cm_rej_code_stale_conn) { - - CWARN ("conn -> %s stale: retrying\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_reconnect(conn, -ESTALE); - return; - } else { - CDEBUG(D_NETERROR, "conn -> %s rejected: reason %d\n", - libcfs_nid2str(peer->ibp_nid), - cv->cv_conndata.data.reject.reason); - kibnal_connreq_done(conn, 1, -ECONNREFUSED); - return; - } - /* NOT REACHED */ - } - - CDEBUG(D_NETERROR, "conn -> %s failed: %d\n", - libcfs_nid2str(peer->ibp_nid), cv->cv_conndata.status); - kibnal_connreq_done(conn, 1, -ECONNABORTED); -} - -void -kibnal_arp_done (kib_conn_t *conn) -{ - kib_peer_t *peer = conn->ibc_peer; - kib_connvars_t *cv = conn->ibc_connvars; - ibat_arp_data_t *arp = &cv->cv_arp; - ib_path_record_v2_t *path = &cv->cv_path; - vv_return_t vvrc; - int rc; - unsigned long flags; - - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP); - LASSERT (peer->ibp_arp_count > 0); - - if (cv->cv_arprc != ibat_stat_ok) { - CDEBUG(D_NETERROR, "Arp %s @ %u.%u.%u.%u failed: %d\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), - cv->cv_arprc); - goto failed; - } - - if ((arp->mask & IBAT_PRI_PATH_VALID) != 0) { - CDEBUG(D_NET, "Got valid path for %s\n", - libcfs_nid2str(peer->ibp_nid)); - - *path = *arp->primary_path; - - vvrc = base_gid2port_num(kibnal_data.kib_hca, &path->sgid, - &cv->cv_port); - if (vvrc != vv_return_ok) { - CWARN("base_gid2port_num failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - vvrc = gid2gid_index(kibnal_data.kib_hca, cv->cv_port, - &path->sgid, &cv->cv_sgid_index); - if (vvrc != vv_return_ok) { - CWARN("gid2gid_index failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - vvrc = pkey2pkey_index(kibnal_data.kib_hca, cv->cv_port, - path->pkey, &cv->cv_pkey_index); - if (vvrc != vv_return_ok) { - CWARN("pkey2pkey_index failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - path->mtu = IBNAL_IB_MTU; - - } else if ((arp->mask & IBAT_LID_VALID) != 0) { - CWARN("Creating new path record for %s @ %u.%u.%u.%u\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip)); - - cv->cv_pkey_index = IBNAL_PKEY_IDX; - cv->cv_sgid_index = IBNAL_SGID_IDX; - cv->cv_port = arp->local_port_num; - - memset(path, 0, sizeof(*path)); - - vvrc = port_num2base_gid(kibnal_data.kib_hca, cv->cv_port, - &path->sgid); - if (vvrc != vv_return_ok) { - CWARN("port_num2base_gid failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_ip), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - vvrc = port_num2base_lid(kibnal_data.kib_hca, cv->cv_port, - &path->slid); - if (vvrc != vv_return_ok) { - CWARN("port_num2base_lid failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_ip), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - path->dgid = arp->gid; - path->sl = IBNAL_SERVICE_LEVEL; - path->dlid = arp->lid; - path->mtu = IBNAL_IB_MTU; - path->rate = IBNAL_STATIC_RATE; - path->pkt_life_time = IBNAL_PKT_LIFETIME; - path->pkey = IBNAL_PKEY; - path->traffic_class = IBNAL_TRAFFIC_CLASS; - } else { - CWARN("Arp for %s @ %u.%u.%u.%u returned neither PATH nor LID\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip)); - goto failed; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_init); - if (rc != 0) { - kibnal_connreq_done(conn, 1, rc); - } - - /* do the actual connection request */ - kibnal_connect_conn(conn); - return; - - failed: - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - peer->ibp_arp_count--; - if (peer->ibp_arp_count == 0) { - /* final ARP attempt failed */ - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - CDEBUG(D_NETERROR, "Arp %s @ %u.%u.%u.%u failed (final attempt)\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip)); - } else { - /* Retry ARP: ibp_connecting++ so terminating conn - * doesn't end peer's connection attempt */ - peer->ibp_connecting++; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - CDEBUG(D_NETERROR, "Arp %s @ %u.%u.%u.%u failed (%d attempts left)\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), - peer->ibp_arp_count); - - kibnal_schedule_peer_arp(peer); - } - kibnal_connreq_done(conn, 1, -ENETUNREACH); -} - -void -kibnal_arp_callback (ibat_stat_t arprc, ibat_arp_data_t *arp_data, void *arg) -{ - /* CAVEAT EMPTOR: tasklet context */ - kib_peer_t *peer; - kib_conn_t *conn = (kib_conn_t *)arg; - - LASSERT (conn != NULL); - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP); - - peer = conn->ibc_peer; - - if (arprc != ibat_stat_ok) - CDEBUG(D_NETERROR, "Arp %s at %u.%u.%u.%u failed: %d\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), arprc); - else - CDEBUG(D_NET, "Arp %s at %u.%u.%u.%u OK: LID %s PATH %s\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), - (arp_data->mask & IBAT_LID_VALID) == 0 ? "invalid" : "valid", - (arp_data->mask & IBAT_PRI_PATH_VALID) == 0 ? "invalid" : "valid"); - - conn->ibc_connvars->cv_arprc = arprc; - if (arprc == ibat_stat_ok) - conn->ibc_connvars->cv_arp = *arp_data; - - kibnal_schedule_conn(conn); - kibnal_conn_decref(conn); -} - -void -kibnal_arp_peer (kib_peer_t *peer) -{ - cm_cep_handle_t cep; - kib_conn_t *conn; - int ibatrc; - - /* Only the connd does this (i.e. single threaded) */ - LASSERT (current == kibnal_data.kib_connd); - LASSERT (peer->ibp_connecting != 0); - LASSERT (peer->ibp_arp_count > 0); - - cep = cm_create_cep(cm_cep_transp_rc); - if (cep == NULL) { - CERROR ("Can't create cep for conn->%s\n", - libcfs_nid2str(peer->ibp_nid)); - kibnal_peer_connect_failed(peer, 1, -ENOMEM); - return; - } - - conn = kibnal_create_conn(cep); - if (conn == NULL) { - CERROR ("Can't allocate conn->%s\n", - libcfs_nid2str(peer->ibp_nid)); - cm_destroy_cep(cep); - kibnal_peer_connect_failed(peer, 1, -ENOMEM); - return; - } - - conn->ibc_peer = peer; - kibnal_peer_addref(peer); - - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_ARP); - - ibatrc = ibat_get_ib_data(htonl(peer->ibp_ip), INADDR_ANY, - ibat_paths_primary, - &conn->ibc_connvars->cv_arp, - kibnal_arp_callback, conn, 0); - CDEBUG(D_NET,"ibatrc %d\n", ibatrc); - switch (ibatrc) { - default: - LBUG(); - - case ibat_stat_pending: - /* NB callback has my ref on conn */ - break; - - case ibat_stat_ok: - case ibat_stat_error: - case ibat_stat_timeout: - case ibat_stat_not_found: - /* Immediate return (ARP cache hit or failure) == no callback. - * Do the next stage directly... */ - conn->ibc_connvars->cv_arprc = ibatrc; - kibnal_arp_done(conn); - kibnal_conn_decref(conn); - break; - } -} - -int -kibnal_check_txs (kib_conn_t *conn, struct list_head *txs) -{ - kib_tx_t *tx; - struct list_head *ttmp; - int timed_out = 0; - - spin_lock(&conn->ibc_lock); - - list_for_each (ttmp, txs) { - tx = list_entry (ttmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - if (time_after_eq (jiffies, tx->tx_deadline)) { - timed_out = 1; - break; - } - } - - spin_unlock(&conn->ibc_lock); - return timed_out; -} - -int -kibnal_conn_timed_out (kib_conn_t *conn) -{ - return kibnal_check_txs(conn, &conn->ibc_tx_queue) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) || - kibnal_check_txs(conn, &conn->ibc_active_txs); -} - -void -kibnal_check_conns (int idx) -{ - struct list_head *peers = &kibnal_data.kib_peers[idx]; - struct list_head *ptmp; - kib_peer_t *peer; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - - again: - /* NB. We expect to have a look at all the peers and not find any - * rdmas to time out, so we just use a shared lock while we - * take a look... */ - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - - list_for_each (ctmp, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); - - /* In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - kibnal_check_sends(conn); - - if (!kibnal_conn_timed_out(conn)) - continue; - - /* Handle timeout by closing the whole connection. We - * can only be sure RDMA activity has ceased once the - * QP has been modified. */ - - kibnal_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - - CERROR("Timed out RDMA with %s\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_close_conn (conn, -ETIMEDOUT); - kibnal_conn_decref(conn); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_disconnect_conn (kib_conn_t *conn) -{ - static cm_drequest_data_t dreq; /* just for the space */ - - cm_return_t cmrc; - unsigned long flags; - - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (conn->ibc_disconnect) { - /* Had the CM callback already */ - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - kibnal_conn_disconnected(conn); - return; - } - - LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECT1); - - /* active disconnect */ - cmrc = cm_disconnect(conn->ibc_cep, &dreq, NULL); - if (cmrc == cm_stat_success) { - /* waiting for CM */ - conn->ibc_state = IBNAL_CONN_DISCONNECT2; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return; - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - cm_cancel(conn->ibc_cep); - cfs_pause(cfs_time_seconds(1)/10); - - if (!conn->ibc_disconnect) /* CM callback will never happen now */ - kibnal_conn_decref(conn); - - LASSERT (atomic_read(&conn->ibc_refcount) > 0); - LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECT1); - - kibnal_conn_disconnected(conn); -} - -int -kibnal_connd (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - kib_pcreq_t *pcr; - kib_conn_t *conn; - kib_peer_t *peer; - int timeout; - int i; - int dropped_lock; - int peer_index = 0; - unsigned long deadline = jiffies; - - cfs_daemonize ("kibnal_connd"); - cfs_block_allsigs (); - - init_waitqueue_entry (&wait, current); - kibnal_data.kib_connd = current; - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - while (!kibnal_data.kib_shutdown) { - - dropped_lock = 0; - - if (!list_empty (&kibnal_data.kib_connd_zombies)) { - conn = list_entry (kibnal_data.kib_connd_zombies.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - kibnal_destroy_conn(conn); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_pcreqs)) { - pcr = list_entry(kibnal_data.kib_connd_pcreqs.next, - kib_pcreq_t, pcr_list); - list_del(&pcr->pcr_list); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - kibnal_recv_connreq(pcr->pcr_cep, &pcr->pcr_cmreq); - LIBCFS_FREE(pcr, sizeof(*pcr)); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_peers)) { - peer = list_entry (kibnal_data.kib_connd_peers.next, - kib_peer_t, ibp_connd_list); - - list_del_init (&peer->ibp_connd_list); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - kibnal_arp_peer (peer); - kibnal_peer_decref (peer); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_conns)) { - conn = list_entry (kibnal_data.kib_connd_conns.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - switch (conn->ibc_state) { - default: - LBUG(); - - case IBNAL_CONN_ACTIVE_ARP: - kibnal_arp_done(conn); - break; - - case IBNAL_CONN_ACTIVE_CONNECT: - kibnal_check_connreply(conn); - break; - - case IBNAL_CONN_PASSIVE_WAIT: - kibnal_check_passive_wait(conn); - break; - - case IBNAL_CONN_DISCONNECT1: - case IBNAL_CONN_DISCONNECT2: - kibnal_disconnect_conn(conn); - break; - } - kibnal_conn_decref(conn); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - /* careful with the jiffy wrap... */ - timeout = (int)(deadline - jiffies); - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kibnal_data.kib_peer_hash_size; - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if (*kibnal_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kibnal_tunables.kib_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kibnal_check_conns (peer_index); - peer_index = (peer_index + 1) % - kibnal_data.kib_peer_hash_size; - } - - deadline += p * HZ; - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - } - - if (dropped_lock) - continue; - - /* Nothing to do for 'timeout' */ - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - schedule_timeout (timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_thread_fini (); - return (0); -} - -void -kibnal_async_callback(vv_event_record_t ev) -{ - CERROR("type: %d, port: %d, data: "LPX64"\n", - ev.event_type, ev.port_num, ev.type.data); -} - -void -kibnal_cq_callback (unsigned long unused_context) -{ - unsigned long flags; - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - kibnal_data.kib_ready = 1; - wake_up(&kibnal_data.kib_sched_waitq); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); -} - -int -kibnal_scheduler(void *arg) -{ - long id = (long)arg; - wait_queue_t wait; - char name[16]; - vv_wc_t wc; - vv_return_t vvrc; - vv_return_t vvrc2; - unsigned long flags; - kib_rx_t *rx; - __u64 rxseq = 0; - int busy_loops = 0; - - snprintf(name, sizeof(name), "kibnal_sd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - - while (!kibnal_data.kib_shutdown) { - if (busy_loops++ >= IBNAL_RESCHED) { - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - our_cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - } - - if (kibnal_data.kib_ready && - !kibnal_data.kib_checking_cq) { - /* take ownership of completion polling */ - kibnal_data.kib_checking_cq = 1; - /* Assume I'll exhaust the CQ */ - kibnal_data.kib_ready = 0; - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - vvrc = vv_poll_for_completion(kibnal_data.kib_hca, - kibnal_data.kib_cq, &wc); - if (vvrc == vv_return_err_cq_empty) { - vvrc2 = vv_request_completion_notification( - kibnal_data.kib_hca, - kibnal_data.kib_cq, - vv_next_solicit_unsolicit_event); - LASSERT (vvrc2 == vv_return_ok); - } - - if (vvrc == vv_return_ok && - kibnal_wreqid2type(wc.wr_id) == IBNAL_WID_RX) { - rx = (kib_rx_t *)kibnal_wreqid2ptr(wc.wr_id); - - /* Grab the RX sequence number NOW before - * anyone else can get an RX completion */ - rxseq = rx->rx_conn->ibc_rxseq++; - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - /* give up ownership of completion polling */ - kibnal_data.kib_checking_cq = 0; - - if (vvrc == vv_return_err_cq_empty) - continue; - - LASSERT (vvrc == vv_return_ok); - /* Assume there's more: get another scheduler to check - * while I handle this completion... */ - - kibnal_data.kib_ready = 1; - wake_up(&kibnal_data.kib_sched_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - switch (kibnal_wreqid2type(wc.wr_id)) { - case IBNAL_WID_RX: - kibnal_rx_complete( - (kib_rx_t *)kibnal_wreqid2ptr(wc.wr_id), - wc.completion_status, - wc.num_bytes_transfered, - rxseq); - break; - - case IBNAL_WID_TX: - kibnal_tx_complete( - (kib_tx_t *)kibnal_wreqid2ptr(wc.wr_id), - wc.completion_status); - break; - - case IBNAL_WID_RDMA: - /* We only get RDMA completion notification if - * it fails. So we just ignore them completely - * because... - * - * 1) If an RDMA fails, all subsequent work - * items, including the final SEND will fail - * too, so I'm still guaranteed to notice that - * this connection is hosed. - * - * 2) It's positively dangerous to look inside - * the tx descriptor obtained from an RDMA work - * item. As soon as I drop the kib_sched_lock, - * I give a scheduler on another CPU a chance - * to get the final SEND completion, so the tx - * descriptor can get freed as I inspect it. */ - CDEBUG(D_NETERROR, "RDMA failed: %d\n", - wc.completion_status); - break; - - default: - LBUG(); - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - continue; - } - - /* Nothing to do; sleep... */ - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kibnal_data.kib_sched_waitq, &wait); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - schedule(); - - remove_wait_queue(&kibnal_data.kib_sched_waitq, &wait); - set_current_state(TASK_RUNNING); - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - } - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - - kibnal_thread_fini(); - return (0); -} diff --git a/lnet/klnds/viblnd/viblnd_modparams.c b/lnet/klnds/viblnd/viblnd_modparams.c deleted file mode 100644 index f31c8dc2e51cad3e27c6d1b9e8fc939a6ea01c82..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd_modparams.c +++ /dev/null @@ -1,352 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "viblnd.h" - -static int service_number = 0x11b9a2; -CFS_MODULE_PARM(service_number, "i", int, 0444, - "IB service number"); - -static int min_reconnect_interval = 1; -CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644, - "minimum connection retry interval (seconds)"); - -static int max_reconnect_interval = 60; -CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644, - "maximum connection retry interval (seconds)"); - -static int concurrent_peers = 1152; -CFS_MODULE_PARM(concurrent_peers, "i", int, 0444, - "maximum number of peers that may connect"); - -static int cksum = 0; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of message descriptors"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int arp_retries = 3; -CFS_MODULE_PARM(arp_retries, "i", int, 0644, - "# of times to retry ARP"); - -static char *hca_basename = "InfiniHost"; -CFS_MODULE_PARM(hca_basename, "s", charp, 0444, - "HCA base name"); - -static char *ipif_basename = "ipoib"; -CFS_MODULE_PARM(ipif_basename, "s", charp, 0444, - "IPoIB interface base name"); - -static int local_ack_timeout = 0x12; -CFS_MODULE_PARM(local_ack_timeout, "i", int, 0644, - "ACK timeout for low-level 'sends'"); - -static int retry_cnt = 7; -CFS_MODULE_PARM(retry_cnt, "i", int, 0644, - "Retransmissions when no ACK received"); - -static int rnr_cnt = 6; -CFS_MODULE_PARM(rnr_cnt, "i", int, 0644, - "RNR retransmissions"); - -static int rnr_nak_timer = 0x10; -CFS_MODULE_PARM(rnr_nak_timer, "i", int, 0644, - "RNR retransmission interval"); - -static int keepalive = 100; -CFS_MODULE_PARM(keepalive, "i", int, 0644, - "Idle time in seconds before sending a keepalive"); - -static int concurrent_sends = IBNAL_RX_MSGS; -CFS_MODULE_PARM(concurrent_sends, "i", int, 0644, - "send work-queue sizing"); - -#if IBNAL_USE_FMR -static int fmr_remaps = 1000; -CFS_MODULE_PARM(fmr_remaps, "i", int, 0444, - "FMR mappings allowed before unmap"); -#endif - -kib_tunables_t kibnal_tunables = { - .kib_service_number = &service_number, - .kib_min_reconnect_interval = &min_reconnect_interval, - .kib_max_reconnect_interval = &max_reconnect_interval, - .kib_concurrent_peers = &concurrent_peers, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_ntx = &ntx, - .kib_credits = &credits, - .kib_peercredits = &peer_credits, - .kib_arp_retries = &arp_retries, - .kib_hca_basename = &hca_basename, - .kib_ipif_basename = &ipif_basename, - .kib_local_ack_timeout = &local_ack_timeout, - .kib_retry_cnt = &retry_cnt, - .kib_rnr_cnt = &rnr_cnt, - .kib_rnr_nak_timer = &rnr_nak_timer, - .kib_keepalive = &keepalive, - .kib_concurrent_sends = &concurrent_sends, -#if IBNAL_USE_FMR - .kib_fmr_remaps = &fmr_remaps, -#endif -}; - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - -static char hca_basename_space[32]; -static char ipif_basename_space[32]; - -static cfs_sysctl_table_t kibnal_ctl_table[] = { - { - .ctl_name = 1, - .procname = "service_number", - .data = &service_number, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 2, - .procname = "min_reconnect_interval", - .data = &min_reconnect_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 3, - .procname = "max_reconnect_interval", - .data = &max_reconnect_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 4, - .procname = "concurrent_peers", - .data = &concurrent_peers, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 5, - .procname = "cksum", - .data = &cksum, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 6, - .procname = "timeout", - .data = &timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 7, - .procname = "ntx", - .data = &ntx, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 8, - .procname = "credits", - .data = &credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 9, - .procname = "peer_credits", - .data = &peer_credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 10, - .procname = "arp_retries", - .data = &arp_retries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 11, - .procname = "hca_basename", - .data = hca_basename_space, - .maxlen = sizeof(hca_basename_space), - .mode = 0444, - .proc_handler = &proc_dostring - }, - { - .ctl_name = 12, - .procname = "ipif_basename", - .data = ipif_basename_space, - .maxlen = sizeof(ipif_basename_space), - .mode = 0444, - .proc_handler = &proc_dostring - }, - { - .ctl_name = 13, - .procname = "local_ack_timeout", - .data = &local_ack_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 14, - .procname = "retry_cnt", - .data = &retry_cnt, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 15, - .procname = "rnr_cnt", - .data = &rnr_cnt, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 16, - .procname = "rnr_nak_timer", - .data = &rnr_nak_timer, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 17, - .procname = "keepalive", - .data = &keepalive, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = 18, - .procname = "concurrent_sends", - .data = &concurrent_sends, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, -#if IBNAL_USE_FMR - { - .ctl_name = 19, - .procname = "fmr_remaps", - .data = &fmr_remaps, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, -#endif - {0} -}; - -static cfs_sysctl_table_t kibnal_top_ctl_table[] = { - { - .ctl_name = 203, - .procname = "vibnal", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = kibnal_ctl_table - }, - {0} -}; - -void -kibnal_initstrtunable(char *space, char *str, int size) -{ - strncpy(space, str, size); - space[size-1] = 0; -} - -int -kibnal_tunables_init () -{ - kibnal_initstrtunable(hca_basename_space, hca_basename, - sizeof(hca_basename_space)); - kibnal_initstrtunable(ipif_basename_space, ipif_basename, - sizeof(ipif_basename_space)); - - kibnal_tunables.kib_sysctl = - cfs_register_sysctl_table(kibnal_top_ctl_table, 0); - - if (kibnal_tunables.kib_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - if (*kibnal_tunables.kib_concurrent_sends > IBNAL_RX_MSGS) - *kibnal_tunables.kib_concurrent_sends = IBNAL_RX_MSGS; - if (*kibnal_tunables.kib_concurrent_sends < IBNAL_MSG_QUEUE_SIZE) - *kibnal_tunables.kib_concurrent_sends = IBNAL_MSG_QUEUE_SIZE; - - return 0; -} - -void -kibnal_tunables_fini () -{ - if (kibnal_tunables.kib_sysctl != NULL) - cfs_unregister_sysctl_table(kibnal_tunables.kib_sysctl); -} - -#else - -int -kibnal_tunables_init () -{ - return 0; -} - -void -kibnal_tunables_fini () -{ -} - -#endif diff --git a/lnet/klnds/viblnd/viblnd_wire.h b/lnet/klnds/viblnd/viblnd_wire.h deleted file mode 100644 index 26242c185290702246089058bf8b728f3c4e27f8..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd_wire.h +++ /dev/null @@ -1,121 +0,0 @@ -/************************************************************************ - * IB Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - */ - -typedef struct kib_connparams -{ - __u32 ibcp_queue_depth; - __u32 ibcp_max_msg_size; - __u32 ibcp_max_frags; -} WIRE_ATTR kib_connparams_t; - -typedef struct -{ - lnet_hdr_t ibim_hdr; /* portals header */ - char ibim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kib_immediate_msg_t; - -#ifndef IBNAL_USE_FMR -# error "IBNAL_USE_FMR must be defined 1 or 0 before including this file" -#endif - -#if IBNAL_USE_FMR -typedef struct -{ - __u64 rd_addr; /* IO VMA address */ - __u32 rd_nob; /* # of bytes */ - __u32 rd_key; /* remote key */ -} WIRE_ATTR kib_rdma_desc_t; -#else -/* YEUCH! the __u64 address is split into 2 __u32 fields to ensure proper - * packing. Otherwise we can't fit enough frags into an IBNAL message (<= - * smallest page size on any arch). */ -typedef struct -{ - __u32 rf_nob; /* # of bytes */ - __u32 rf_addr_lo; /* lo 4 bytes of vaddr */ - __u32 rf_addr_hi; /* hi 4 bytes of vaddr */ -} WIRE_ATTR kib_rdma_frag_t; - -typedef struct -{ - __u32 rd_key; /* local/remote key */ - __u32 rd_nfrag; /* # fragments */ - kib_rdma_frag_t rd_frags[0]; /* buffer frags */ -} WIRE_ATTR kib_rdma_desc_t; -#endif - -typedef struct -{ - lnet_hdr_t ibprm_hdr; /* portals header */ - __u64 ibprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kib_putreq_msg_t; - -typedef struct -{ - __u64 ibpam_src_cookie; /* reflected completion cookie */ - __u64 ibpam_dst_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */ -} WIRE_ATTR kib_putack_msg_t; - -typedef struct -{ - lnet_hdr_t ibgm_hdr; /* portals header */ - __u64 ibgm_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibgm_rd; /* rdma descriptor */ -} WIRE_ATTR kib_get_msg_t; - -typedef struct -{ - __u64 ibcm_cookie; /* opaque completion cookie */ - __s32 ibcm_status; /* < 0 failure: >= 0 length */ -} WIRE_ATTR kib_completion_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ibm_magic; /* I'm an openibnal message */ - __u16 ibm_version; /* this is my version number */ - - __u8 ibm_type; /* msg type */ - __u8 ibm_credits; /* returned credits */ - __u32 ibm_nob; /* # bytes in whole message */ - __u32 ibm_cksum; /* checksum (0 == no checksum) */ - __u64 ibm_srcnid; /* sender's NID */ - __u64 ibm_srcstamp; /* sender's incarnation */ - __u64 ibm_dstnid; /* destination's NID */ - __u64 ibm_dststamp; /* destination's incarnation */ - __u64 ibm_seq; /* sequence number */ - - union { - kib_connparams_t connparams; - kib_immediate_msg_t immediate; - kib_putreq_msg_t putreq; - kib_putack_msg_t putack; - kib_get_msg_t get; - kib_completion_msg_t completion; - } WIRE_ATTR ibm_u; -} WIRE_ATTR kib_msg_t; - -#define IBNAL_MSG_MAGIC LNET_PROTO_VIB_MAGIC /* unique magic */ - -#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 0x10 /* previous version */ - -#define IBNAL_MSG_VERSION 0x11 /* current version */ - -#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */ -#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */ -#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define IBNAL_MSG_IMMEDIATE 0xd1 /* immediate */ -#define IBNAL_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */ -#define IBNAL_MSG_PUT_NAK 0xd3 /* completion (sink->src) */ -#define IBNAL_MSG_PUT_ACK 0xd4 /* putack (sink->src) */ -#define IBNAL_MSG_PUT_DONE 0xd5 /* completion (src->sink) */ -#define IBNAL_MSG_GET_REQ 0xd6 /* getreq (sink->src) */ -#define IBNAL_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */ - -/* connection rejection reasons */ -#define IBNAL_REJECT_CONN_RACE 0 /* You lost connection race */ -#define IBNAL_REJECT_NO_RESOURCES 1 /* Out of memory/conns etc */ -#define IBNAL_REJECT_FATAL 2 /* Anything else */ diff --git a/lnet/klnds/viblnd/wirecheck.c b/lnet/klnds/viblnd/wirecheck.c deleted file mode 100644 index 5a0e060a4ee6102f8cd6e5b07fb5a7e0e5f5ab6b..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/wirecheck.c +++ /dev/null @@ -1,227 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include <stdio.h> -#include <string.h> -#include <sys/types.h> -#include <sys/wait.h> - -#include <lnet/api-support.h> - -/* This ghastly hack to allows me to include lib-types.h It doesn't affect any - * assertions generated here (but fails-safe if it ever does) */ -typedef struct { - int counter; -} atomic_t; - -#include <lnet/lib-types.h> - -#define IBNAL_USE_FMR 1 -#include "viblnd_wire.h" - -#ifndef HAVE_STRNLEN -#define strnlen(s, i) strlen(s) -#endif - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#undef STRINGIFY -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf (" CLASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE((int)offsetof(s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - BLANK_LINE (); \ - COMMENT ("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(s)); \ -} while (0) - -void -system_string (char *cmdline, char *str, int len) -{ - int fds[2]; - int rc; - pid_t pid; - - rc = pipe (fds); - if (rc != 0) - abort (); - - pid = fork (); - if (pid == 0) { - /* child */ - int fd = fileno(stdout); - - rc = dup2(fds[1], fd); - if (rc != fd) - abort(); - - exit(system(cmdline)); - /* notreached */ - } else if ((int)pid < 0) { - abort(); - } else { - FILE *f = fdopen (fds[0], "r"); - - if (f == NULL) - abort(); - - close(fds[1]); - - if (fgets(str, len, f) == NULL) - abort(); - - if (waitpid(pid, &rc, 0) != pid) - abort(); - - if (!WIFEXITED(rc) || - WEXITSTATUS(rc) != 0) - abort(); - - if (strnlen(str, len) == len) - str[len - 1] = 0; - - if (str[strlen(str) - 1] == '\n') - str[strlen(str) - 1] = 0; - - fclose(f); - } -} - -int -main (int argc, char **argv) -{ - char unameinfo[80]; - char gccinfo[80]; - - system_string("uname -a", unameinfo, sizeof(unameinfo)); - system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); - - printf ("void vibnal_assert_wire_constants (void)\n" - "{\n" - " /* Wire protocol assertions generated by 'wirecheck'\n" - " * running on %s\n" - " * with %s */\n" - "\n", unameinfo, gccinfo); - - BLANK_LINE (); - - COMMENT ("Constants..."); - CHECK_DEFINE (IBNAL_MSG_MAGIC); - CHECK_DEFINE (IBNAL_MSG_VERSION); - - CHECK_DEFINE (IBNAL_MSG_CONNREQ); - CHECK_DEFINE (IBNAL_MSG_CONNACK); - CHECK_DEFINE (IBNAL_MSG_NOOP); - CHECK_DEFINE (IBNAL_MSG_IMMEDIATE); - CHECK_DEFINE (IBNAL_MSG_PUT_REQ); - CHECK_DEFINE (IBNAL_MSG_PUT_NAK); - CHECK_DEFINE (IBNAL_MSG_PUT_ACK); - CHECK_DEFINE (IBNAL_MSG_PUT_DONE); - CHECK_DEFINE (IBNAL_MSG_GET_REQ); - CHECK_DEFINE (IBNAL_MSG_GET_DONE); - - CHECK_DEFINE (IBNAL_REJECT_CONN_RACE); - CHECK_DEFINE (IBNAL_REJECT_NO_RESOURCES); - CHECK_DEFINE (IBNAL_REJECT_FATAL); - - CHECK_STRUCT (kib_connparams_t); - CHECK_MEMBER (kib_connparams_t, ibcp_queue_depth); - CHECK_MEMBER (kib_connparams_t, ibcp_max_msg_size); - CHECK_MEMBER (kib_connparams_t, ibcp_max_frags); - - CHECK_STRUCT (kib_immediate_msg_t); - CHECK_MEMBER (kib_immediate_msg_t, ibim_hdr); - CHECK_MEMBER (kib_immediate_msg_t, ibim_payload[13]); - - CHECK_DEFINE (IBNAL_USE_FMR); -#if IBNAL_USE_FMR - CHECK_STRUCT (kib_rdma_desc_t); - CHECK_MEMBER (kib_rdma_desc_t, rd_addr); - CHECK_MEMBER (kib_rdma_desc_t, rd_nob); - CHECK_MEMBER (kib_rdma_desc_t, rd_key); -#else - CHECK_STRUCT (kib_rdma_frag_t); - CHECK_MEMBER (kib_rdma_frag_t, rf_nob); - CHECK_MEMBER (kib_rdma_frag_t, rf_addr_lo); - CHECK_MEMBER (kib_rdma_frag_t, rf_addr_hi); - - CHECK_STRUCT (kib_rdma_desc_t); - CHECK_MEMBER (kib_rdma_desc_t, rd_key); - CHECK_MEMBER (kib_rdma_desc_t, rd_nfrag); - CHECK_MEMBER (kib_rdma_desc_t, rd_frags[13]); -#endif - CHECK_STRUCT (kib_putreq_msg_t); - CHECK_MEMBER (kib_putreq_msg_t, ibprm_hdr); - CHECK_MEMBER (kib_putreq_msg_t, ibprm_cookie); - - CHECK_STRUCT (kib_putack_msg_t); - CHECK_MEMBER (kib_putack_msg_t, ibpam_src_cookie); - CHECK_MEMBER (kib_putack_msg_t, ibpam_dst_cookie); - CHECK_MEMBER (kib_putack_msg_t, ibpam_rd); - - CHECK_STRUCT (kib_get_msg_t); - CHECK_MEMBER (kib_get_msg_t, ibgm_hdr); - CHECK_MEMBER (kib_get_msg_t, ibgm_cookie); - CHECK_MEMBER (kib_get_msg_t, ibgm_rd); - - CHECK_STRUCT (kib_completion_msg_t); - CHECK_MEMBER (kib_completion_msg_t, ibcm_cookie); - CHECK_MEMBER (kib_completion_msg_t, ibcm_status); - - CHECK_STRUCT (kib_msg_t); - CHECK_MEMBER (kib_msg_t, ibm_magic); - CHECK_MEMBER (kib_msg_t, ibm_version); - CHECK_MEMBER (kib_msg_t, ibm_type); - CHECK_MEMBER (kib_msg_t, ibm_credits); - CHECK_MEMBER (kib_msg_t, ibm_nob); - CHECK_MEMBER (kib_msg_t, ibm_cksum); - CHECK_MEMBER (kib_msg_t, ibm_srcnid); - CHECK_MEMBER (kib_msg_t, ibm_srcstamp); - CHECK_MEMBER (kib_msg_t, ibm_dstnid); - CHECK_MEMBER (kib_msg_t, ibm_dststamp); - CHECK_MEMBER (kib_msg_t, ibm_seq); - CHECK_MEMBER (kib_msg_t, ibm_u.connparams); - CHECK_MEMBER (kib_msg_t, ibm_u.immediate); - CHECK_MEMBER (kib_msg_t, ibm_u.putreq); - CHECK_MEMBER (kib_msg_t, ibm_u.putack); - CHECK_MEMBER (kib_msg_t, ibm_u.get); - CHECK_MEMBER (kib_msg_t, ibm_u.completion); - - printf ("}\n\n"); - - return (0); -} diff --git a/lnet/libcfs/.cvsignore b/lnet/libcfs/.cvsignore deleted file mode 100644 index c6f0aa426764bcd9e528d8671ea178383befe151..0000000000000000000000000000000000000000 --- a/lnet/libcfs/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -link-stamp -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/libcfs/Info.plist b/lnet/libcfs/Info.plist deleted file mode 100644 index aaf9b2f1aa2bc5a86befc758b17ec1e0f4133ba3..0000000000000000000000000000000000000000 --- a/lnet/libcfs/Info.plist +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> -<plist version="1.0"> -<dict> - <key>CFBundleDevelopmentRegion</key> - <string>English</string> - <key>CFBundleExecutable</key> - <string>libcfs</string> - <key>CFBundleIconFile</key> - <string></string> - <key>CFBundleIdentifier</key> - <string>com.clusterfs.lustre.libcfs</string> - <key>CFBundleInfoDictionaryVersion</key> - <string>6.0</string> - <key>CFBundlePackageType</key> - <string>KEXT</string> - <key>CFBundleSignature</key> - <string>????</string> - <key>CFBundleVersion</key> - <string>1.0.1</string> - <key>OSBundleCompatibleVersion</key> - <string>1.0.0</string> - <key>OSBundleLibraries</key> - <dict> - <key>com.apple.kpi.bsd</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.libkern</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.mach</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.unsupported</key> - <string>8.0.0b1</string> - </dict> -</dict> -</plist> diff --git a/lnet/libcfs/Makefile.in b/lnet/libcfs/Makefile.in deleted file mode 100644 index 0940a567a9c738be60ace0caee07296cfb832caf..0000000000000000000000000000000000000000 --- a/lnet/libcfs/Makefile.in +++ /dev/null @@ -1,33 +0,0 @@ -MODULES = libcfs - -libcfs-linux-objs := linux-tracefile.o linux-debug.o -libcfs-linux-objs += linux-prim.o linux-mem.o -libcfs-linux-objs += linux-fs.o linux-sync.o linux-tcpip.o -libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o -libcfs-linux-objs += linux-utils.o linux-module.o - -ifeq ($(PATCHLEVEL),6) -libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) -endif - -default: all - -ifeq (@linux25@,no) -sources: - @for i in $(libcfs-linux-objs:%.o=%.c) ; do \ - echo "ln -s @srcdir@/linux/$$i ." ; \ - ln -sf @srcdir@/linux/$$i . || exit 1 ; \ - done - -else -sources: - -endif - -libcfs-all-objs := debug.o nidstrings.o lwt.o module.o tracefile.o watchdog.o - -libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) - -EXTRA_PRE_CFLAGS := -I@LUSTRE@/../lnet/libcfs - -@INCLUDE_RULES@ diff --git a/lnet/libcfs/autoMakefile.am b/lnet/libcfs/autoMakefile.am deleted file mode 100644 index e70e5cefea0c8ba00133ab95ec8dbf889c5c7baa..0000000000000000000000000000000000000000 --- a/lnet/libcfs/autoMakefile.am +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (C) 2001, 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS := linux -if DARWIN -SUBDIRS += darwin -endif -DIST_SUBDIRS := $(SUBDIRS) - -if LIBLUSTRE -noinst_LIBRARIES= libcfs.a -libcfs_a_SOURCES= debug.c user-prim.c user-lock.c user-tcpip.c user-bitops.c -libcfs_a_CPPFLAGS = $(LLCPPFLAGS) -libcfs_a_CFLAGS = $(LLCFLAGS) -endif - -if MODULES - -if LINUX -modulenet_DATA := libcfs$(KMODEXT) -endif - -if DARWIN -macos_PROGRAMS := libcfs - -nodist_libcfs_SOURCES := darwin/darwin-sync.c darwin/darwin-mem.c \ - darwin/darwin-prim.c darwin/darwin-fs.c darwin/darwin-curproc.c \ - darwin/darwin-tcpip.c darwin/darwin-utils.c \ - darwin/darwin-debug.c darwin/darwin-proc.c \ - darwin/darwin-tracefile.c darwin/darwin-module.c \ - debug.c module.c tracefile.c nidstrings.c watchdog.c - -libcfs_CFLAGS := $(EXTRA_KCFLAGS) -libcfs_LDFLAGS := $(EXTRA_KLDFLAGS) -libcfs_LDADD := $(EXTRA_KLIBS) - -plist_DATA := Info.plist - -install_data_hook := fix-kext-ownership - -endif - -endif - -install-data-hook: $(install_data_hook) - -EXTRA_DIST := Info.plist - -MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o darwin/*.o libcfs -DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h user-prim.c \ - user-lock.c user-tcpip.c user-bitops.c diff --git a/lnet/libcfs/darwin/.cvsignore b/lnet/libcfs/darwin/.cvsignore deleted file mode 100644 index 282522db0342d8750454b3dc162493b5fc709cc8..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lnet/libcfs/darwin/Makefile.am b/lnet/libcfs/darwin/Makefile.am deleted file mode 100644 index 3f2077b753119df6003403e95bb7f6217bb139b1..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -EXTRA_DIST := \ - darwin-mem.c \ - darwin-proc.c \ - darwin-utils.c \ - darwin-debug.c \ - darwin-module.c \ - darwin-sync.c \ - darwin-fs.c \ - darwin-prim.c \ - darwin-tracefile.c \ - darwin-curproc.c \ - darwin-tcpip.c diff --git a/lnet/libcfs/darwin/darwin-curproc.c b/lnet/libcfs/darwin/darwin-curproc.c deleted file mode 100644 index e12394e4a6f9fc144c8bd5956b23e185397f7e8a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-curproc.c +++ /dev/null @@ -1,164 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre curproc API implementation for XNU kernel - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - * Public License for more details. You should have received a copy of the GNU - * General Public License along with Lustre; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * Implementation of cfs_curproc API (see lnet/include/libcfs/curproc.h) - * for XNU kernel. - */ - -static inline struct ucred *curproc_ucred(void) -{ -#ifdef __DARWIN8__ - return proc_ucred(current_proc()); -#else - return current_proc()->p_cred->pc_ucred; -#endif -} - -uid_t cfs_curproc_uid(void) -{ - return curproc_ucred()->cr_uid; -} - -gid_t cfs_curproc_gid(void) -{ - LASSERT(curproc_ucred()->cr_ngroups > 0); - return curproc_ucred()->cr_groups[0]; -} - -uid_t cfs_curproc_fsuid(void) -{ -#ifdef __DARWIN8__ - return curproc_ucred()->cr_ruid; -#else - return current_proc()->p_cred->p_ruid; -#endif -} - -gid_t cfs_curproc_fsgid(void) -{ -#ifdef __DARWIN8__ - return curproc_ucred()->cr_rgid; -#else - return current_proc()->p_cred->p_rgid; -#endif -} - -pid_t cfs_curproc_pid(void) -{ -#ifdef __DARWIN8__ - /* no pid for each thread, return address of thread struct */ - return (pid_t)current_thread(); -#else - return current_proc()->p_pid; -#endif -} - -int cfs_curproc_groups_nr(void) -{ - LASSERT(curproc_ucred()->cr_ngroups > 0); - return curproc_ucred()->cr_ngroups - 1; -} - -int cfs_curproc_is_in_groups(gid_t gid) -{ - int i; - struct ucred *cr; - - cr = curproc_ucred(); - LASSERT(cr != NULL); - - for (i = 0; i < cr->cr_ngroups; ++ i) { - if (cr->cr_groups[i] == gid) - return 1; - } - return 0; -} - -void cfs_curproc_groups_dump(gid_t *array, int size) -{ - struct ucred *cr; - - cr = curproc_ucred(); - LASSERT(cr != NULL); - CLASSERT(sizeof array[0] == sizeof (__u32)); - - size = min_t(int, size, cr->cr_ngroups); - memcpy(array, &cr->cr_groups[1], size * sizeof(gid_t)); -} - -mode_t cfs_curproc_umask(void) -{ -#ifdef __DARWIN8__ - /* - * XXX Liang: - * - * fd_cmask is not available in kexts, so we just assume - * verything is permited. - */ - return -1; -#else - return current_proc()->p_fd->fd_cmask; -#endif -} - -char *cfs_curproc_comm(void) -{ -#ifdef __DARWIN8__ - /* - * Writing to proc->p_comm is not permited in Darwin8, - * because proc_selfname() only return a copy of proc->p_comm, - * so this function is not really working while user try to - * change comm of current process. - */ - static char pcomm[MAXCOMLEN+1]; - - proc_selfname(pcomm, MAXCOMLEN+1); - return pcomm; -#else - return current_proc()->p_comm; -#endif -} - -cfs_kernel_cap_t cfs_curproc_cap_get(void) -{ - return -1; -} - -void cfs_curproc_cap_set(cfs_kernel_cap_t cap) -{ - return; -} - - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/darwin/darwin-debug.c b/lnet/libcfs/darwin/darwin-debug.c deleted file mode 100644 index 2152d4052916d37c5bdf7c02bcf526f5c75fcd8a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-debug.c +++ /dev/null @@ -1,77 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> -#include "tracefile.h" - -void libcfs_debug_dumpstack(cfs_task_t *tsk) -{ - return; -} - -void libcfs_run_lbug_upcall(char *file, const char *fn, const int line) -{ -} - -void lbug_with_loc(char *file, const char *func, const int line) -{ - libcfs_catastrophe = 1; - CEMERG("LBUG: pid: %u thread: %#x\n", - (unsigned)cfs_curproc_pid(), (unsigned)current_thread()); - libcfs_debug_dumplog(); - libcfs_run_lbug_upcall(file, func, line); - while (1) - cfs_schedule(); - - /* panic("lbug_with_loc(%s, %s, %d)", file, func, line) */ -} - -#if ENTRY_NESTING_SUPPORT - -static inline struct cfs_debug_data *__current_cdd(void) -{ - struct cfs_debug_data *cdd; - - cdd = (struct cfs_debug_data *)current_uthread()->uu_nlminfo; - if (cdd != NULL && - cdd->magic1 == CDD_MAGIC1 && cdd->magic2 == CDD_MAGIC2 && - cdd->nesting_level < 1000) - return cdd; - else - return NULL; -} - -static inline void __current_cdd_set(struct cfs_debug_data *cdd) -{ - current_uthread()->uu_nlminfo = (void *)cdd; -} - -void __entry_nesting(struct cfs_debug_data *child) -{ - struct cfs_debug_data *parent; - - parent = __current_cdd(); - if (parent != NULL) { - child->parent = parent; - child->nesting_level = parent->nesting_level + 1; - } - __current_cdd_set(child); -} - -void __exit_nesting(struct cfs_debug_data *child) -{ - __current_cdd_set(child->parent); -} - -unsigned int __current_nesting_level(void) -{ - struct cfs_debug_data *cdd; - - cdd = __current_cdd(); - if (cdd != NULL) - return cdd->nesting_level; - else - return 0; -} -/* ENTRY_NESTING_SUPPORT */ -#endif diff --git a/lnet/libcfs/darwin/darwin-fs.c b/lnet/libcfs/darwin/darwin-fs.c deleted file mode 100644 index 6fce8d54687783b91dcc18ba0e9bbd6783409bb9..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-fs.c +++ /dev/null @@ -1,451 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#include <mach/mach_types.h> -#include <string.h> -#include <sys/file.h> -#include <sys/malloc.h> -#include <sys/conf.h> -#include <sys/mount.h> -#include <sys/uio.h> -#include <sys/filedesc.h> -#include <sys/namei.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * Kernel APIs for file system in xnu - * - * Public functions - */ - -#ifdef __DARWIN8__ -#include <sys/vnode.h> - -extern int vn_rdwr(enum uio_rw, vnode_t, caddr_t, int, off_t, enum uio_seg, int, kauth_cred_t, int *, proc_t); - -/* vnode_size() is not exported */ -static errno_t -vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx) -{ - struct vnode_attr va; - int error; - - VATTR_INIT(&va); - VATTR_WANTED(&va, va_data_size); - error = vnode_getattr(vp, &va, ctx); - if (!error) - *sizep = va.va_data_size; - return(error); -} - -/* - * XXX Liang: - * - * kern_file_*() are not safe for multi-threads now, - * however, we need them only for tracefiled, so it's - * not so important to implement for MT. - */ -int -kern_file_size(struct cfs_kern_file *fp, off_t *psize) -{ - int error; - off_t size; - - error = vnode_size(fp->f_vp, &size, fp->f_ctxt); - if (error) - return error; - - if (psize) - *psize = size; - return 0; -} - -struct cfs_kern_file * -kern_file_open(const char * filename, int uflags, int mode, int *err) -{ - struct cfs_kern_file *fp; - vnode_t vp; - int error; - - fp = (struct cfs_kern_file *)_MALLOC(sizeof(struct cfs_kern_file), M_TEMP, M_WAITOK); - if (fp == NULL) { - if (err != NULL) - *err = -ENOMEM; - return NULL; - } - fp->f_flags = FFLAGS(uflags); - fp->f_ctxt = vfs_context_create(NULL); - - if ((error = vnode_open(filename, fp->f_flags, - mode, 0, &vp, fp->f_ctxt))){ - if (err != NULL) - *err = -error; - _FREE(fp, M_TEMP); - } else { - if (err != NULL) - *err = 0; - fp->f_vp = vp; - } - - return fp; -} - -int -kern_file_close(struct cfs_kern_file *fp) -{ - vnode_close(fp->f_vp, fp->f_flags, fp->f_ctxt); - vfs_context_rele(fp->f_ctxt); - _FREE(fp, M_TEMP); - - return 0; -} - -int -kern_file_read(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos) -{ - struct proc *p = current_proc(); - int resid; - int error; - - assert(buf != NULL); - assert(fp != NULL && fp->f_vp != NULL); - - error = vn_rdwr(UIO_READ, fp->f_vp, buf, nbytes, *pos, - UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p); - if ((error) || (nbytes == resid)) { - if (!error) - error = -EINVAL; - return error; - } - *pos += nbytes - resid; - - return (int)(nbytes - resid); -} - -int -kern_file_write(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos) -{ - struct proc *p = current_proc(); - int resid; - int error; - - assert(buf != NULL); - assert(fp != NULL && fp->f_vp != NULL); - - error = vn_rdwr(UIO_WRITE, fp->f_vp, buf, nbytes, *pos, - UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p); - if ((error) || (nbytes == resid)) { - if (!error) - error = -EINVAL; - return error; - } - *pos += nbytes - resid; - - return (int)(nbytes - resid); - -} - -int -kern_file_sync (struct cfs_kern_file *fp) -{ - return VNOP_FSYNC(fp->f_vp, MNT_WAIT, fp->f_ctxt); -} - -#else /* !__DARWIN8__ */ - -int -kern_file_size(struct file *fp, off_t *size) -{ - struct vnode *vp = (struct vnode *)fp->f_data; - struct stat sb; - int rc; - - rc = vn_stat(vp, &sb, current_proc()); - if (rc) { - *size = 0; - return rc; - } - *size = sb.st_size; - return 0; -} - -cfs_file_t * -kern_file_open(const char * filename, int flags, int mode, int *err) -{ - struct nameidata nd; - cfs_file_t *fp; - register struct vnode *vp; - int rc; - extern struct fileops vnops; - extern int nfiles; - CFS_DECL_CONE_DATA; - - CFS_CONE_IN; - nfiles++; - MALLOC_ZONE(fp, cfs_file_t *, sizeof(cfs_file_t), M_FILE, M_WAITOK|M_ZERO); - bzero(fp, sizeof(cfs_file_t)); - fp->f_count = 1; - LIST_CIRCLE(fp, f_list); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)filename, current_proc()); - if ((rc = vn_open(&nd, flags, mode)) != 0){ - printf("filp_open failed at (%d)\n", rc); - if (err != NULL) - *err = rc; - FREE_ZONE(fp, sizeof *fp, M_FILE); - CFS_CONE_EX; - return NULL; - } - vp = nd.ni_vp; - fp->f_flag = flags & FMASK; - fp->f_type = DTYPE_VNODE; - fp->f_ops = &vnops; - fp->f_data = (caddr_t)vp; - fp->f_cred = current_proc()->p_ucred; - /* - * Hold cred to increase reference - */ - crhold(fp->f_cred); - /* - * vnode is locked inside vn_open for lookup, - * we should release the lock before return - */ - VOP_UNLOCK(vp, 0, current_proc()); - CFS_CONE_EX; - - return fp; -} - -static int -frele_internal(cfs_file_t *fp) -{ - if (fp->f_count == (short)0xffff) - panic("frele of lustre: stale"); - if (--fp->f_count < 0) - panic("frele of lustre: count < 0"); - return ((int)fp->f_count); -} - -int -kern_file_close (cfs_file_t *fp) -{ - struct vnode *vp; - CFS_DECL_CONE_DATA; - - if (fp == NULL) - return 0; - - CFS_CONE_IN; - if (frele_internal(fp) > 0) - goto out; - vp = (struct vnode *)fp->f_data; - (void )vn_close(vp, fp->f_flag, fp->f_cred, current_proc()); - /* - * ffree(fp); - * Dont use ffree to release fp!!!! - * ffree will call LIST_REMOVE(fp), - * but fp is not in any list, this will - * cause kernel panic - */ - struct ucred *cred; - cred = fp->f_cred; - if (cred != NOCRED) { - fp->f_cred = NOCRED; - crfree(cred); - } - extern int nfiles; - nfiles--; - memset(fp, 0xff, sizeof *fp); - fp->f_count = (short)0xffff; - FREE_ZONE(fp, sizeof *fp, M_FILE); -out: - CFS_CONE_EX; - return 0; -} - -extern void bwillwrite(void); - -/* - * Write buffer to filp inside kernel - */ -int -kern_file_write (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos) -{ - struct uio auio; - struct iovec aiov; - struct proc *p = current_proc(); - long cnt, error = 0; - int flags = 0; - CFS_DECL_CONE_DATA; - - aiov.iov_base = (void *)(uintptr_t)buf; - aiov.iov_len = nbyte; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - if (pos != NULL) { - auio.uio_offset = *pos; - /* - * Liang: If don't set FOF_OFFSET, vn_write() - * will use fp->f_offset as the the real offset. - * Same in vn_read() - */ - flags |= FOF_OFFSET; - } else - auio.uio_offset = (off_t)-1; - if (nbyte > INT_MAX) - return (EINVAL); - auio.uio_resid = nbyte; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = p; - - cnt = nbyte; - CFS_CONE_IN; - if (fp->f_type == DTYPE_VNODE) - bwillwrite(); /* empty stuff now */ - if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { - if (auio.uio_resid != cnt && (error == ERESTART ||\ - error == EINTR || error == EWOULDBLOCK)) - error = 0; - /* The socket layer handles SIGPIPE */ - if (error == EPIPE && fp->f_type != DTYPE_SOCKET) - psignal(p, SIGPIPE); - } - CFS_CONE_EX; - if (error != 0) - cnt = -error; - else - cnt -= auio.uio_resid; - if (pos != NULL) - *pos += cnt; - return cnt; -} - -/* - * Read from filp inside kernel - */ -int -kern_file_read (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos) -{ - struct uio auio; - struct iovec aiov; - struct proc *p = current_proc(); - long cnt, error = 0; - int flags = 0; - CFS_DECL_CONE_DATA; - - aiov.iov_base = (caddr_t)buf; - aiov.iov_len = nbyte; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - if (pos != NULL) { - auio.uio_offset = *pos; - flags |= FOF_OFFSET; - } else - auio.uio_offset = (off_t)-1; - if (nbyte > INT_MAX) - return (EINVAL); - auio.uio_resid = nbyte; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = p; - - cnt = nbyte; - CFS_CONE_IN; - if ((error = fo_read(fp, &auio, fp->f_cred, flags, p)) != 0) { - if (auio.uio_resid != cnt && (error == ERESTART || - error == EINTR || error == EWOULDBLOCK)) - error = 0; - } - CFS_CONE_EX; - if (error != 0) - cnt = -error; - else - cnt -= auio.uio_resid; - if (pos != NULL) - *pos += cnt; - - return cnt; -} - -int -kern_file_sync (cfs_file_t *fp) -{ - struct vnode *vp = (struct vnode *)fp->f_data; - struct proc *p = current_proc(); - int error = 0; - CFS_DECL_CONE_DATA; - - CFS_CONE_IN; - if (fref(fp) == -1) { - CFS_CONE_EX; - return (-EBADF); - } - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); - VOP_UNLOCK(vp, 0, p); - frele(fp); - CFS_CONE_EX; - - return error; -} - -#endif /* !__DARWIN8__ */ - -struct posix_acl *posix_acl_alloc(int count, int flags) -{ - static struct posix_acl acl; - return &acl; -} - -/* - * XXX Liang: I've not converted all of them, - * more is needed? - */ -int cfs_oflags2univ(int flags) -{ - int f; - - f = flags & O_ACCMODE; - f |= (flags & O_CREAT) ? CFS_O_CREAT: 0; - f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0; - f |= (flags & O_EXCL) ? CFS_O_EXCL: 0; - f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0; - f |= (flags & O_APPEND) ? CFS_O_APPEND: 0; - f |= (flags & O_NOFOLLOW) ? CFS_O_NOFOLLOW: 0; - f |= (flags & O_SYNC)? CFS_O_SYNC: 0; - return f; -} - -/* - * XXX Liang: we don't need it in OSX. - * But it should be implemented anyway. - */ -int cfs_univ2oflags(int flags) -{ - return flags; -} diff --git a/lnet/libcfs/darwin/darwin-internal.h b/lnet/libcfs/darwin/darwin-internal.h deleted file mode 100644 index 6c83577cd4420efee050b36dee8925f78a760585..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-internal.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __LIBCFS_DARWIN_INTERNAL_H__ -#define __LIBCFS_DARWIN_INTERNAL_H__ - -#include <sys/param.h> -#include <sys/kernel.h> -#include <sys/malloc.h> -#include <sys/systm.h> -#include <sys/sysctl.h> - -int cfs_sysctl_isvalid(void); -struct sysctl_oid *cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int (*handler) SYSCTL_HANDLER_ARGS); -struct sysctl_oid *cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int n, - const char *name, int *ptr, int val); -struct sysctl_oid * cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int *ptr, int val); -struct sysctl_oid * cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, char *ptr, int len); -struct sysctl_oid * cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, void *ptr, int size); - -#endif diff --git a/lnet/libcfs/darwin/darwin-mem.c b/lnet/libcfs/darwin/darwin-mem.c deleted file mode 100644 index 3079a56e95bbe298177691afc432865f9d0143c0..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-mem.c +++ /dev/null @@ -1,480 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Liang Zhen <liangzhen@clusterfs.com> - * Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <mach/mach_types.h> -#include <string.h> -#include <sys/malloc.h> - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "darwin-internal.h" - -#if CFS_INDIVIDUAL_ZONE -extern zone_t zinit( vm_size_t, vm_size_t, vm_size_t, const char *); -extern void * zalloc(zone_t zone); -extern void *zalloc_noblock(zone_t zone); -extern void zfree(zone_t zone, void *addr); - -struct cfs_zone_nob { - struct list_head *z_nob; /* Pointer to z_link */ - struct list_head z_link; /* Do NOT access it directly */ -}; - -static struct cfs_zone_nob cfs_zone_nob; -static spinlock_t cfs_zone_guard; - -cfs_mem_cache_t *mem_cache_find(const char *name, size_t objsize) -{ - cfs_mem_cache_t *walker = NULL; - - LASSERT(cfs_zone_nob.z_nob != NULL); - - spin_lock(&cfs_zone_guard); - list_for_each_entry(walker, cfs_zone_nob.z_nob, mc_link) { - if (!strcmp(walker->mc_name, name) && \ - walker->mc_size == objsize) - break; - } - spin_unlock(&cfs_zone_guard); - - return walker; -} - -/* - * our wrapper around kern/zalloc.c:zinit() - * - * Creates copy of name and calls zinit() to do real work. Needed because zone - * survives kext unloading, so that @name cannot be just static string - * embedded into kext image. - */ -cfs_mem_cache_t *mem_cache_create(vm_size_t objsize, const char *name) -{ - cfs_mem_cache_t *mc = NULL; - char *cname; - - MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO); - if (mc == NULL){ - CERROR("cfs_mem_cache created fail!\n"); - return NULL; - } - - cname = _MALLOC(strlen(name) + 1, M_TEMP, M_WAITOK); - LASSERT(cname != NULL); - mc->mc_cache = zinit(objsize, (KMEM_MAX_ZONE * objsize), 0, strcpy(cname, name)); - mc->mc_size = objsize; - CFS_INIT_LIST_HEAD(&mc->mc_link); - strncpy(mc->mc_name, name, 1 + strlen(name)); - return mc; -} - -void mem_cache_destroy(cfs_mem_cache_t *mc) -{ - /* - * zone can NOT be destroyed after creating, - * so just keep it in list. - * - * We will not lost a zone after we unload - * libcfs, it can be found by from libcfs.zone - */ - return; -} - -#define mem_cache_alloc(mc) zalloc((mc)->mc_cache) -#ifdef __DARWIN8__ -# define mem_cache_alloc_nb(mc) zalloc((mc)->mc_cache) -#else -/* XXX Liang: Tiger doesn't export zalloc_noblock() */ -# define mem_cache_alloc_nb(mc) zalloc_noblock((mc)->mc_cache) -#endif -#define mem_cache_free(mc, p) zfree((mc)->mc_cache, p) - -#else /* !CFS_INDIVIDUAL_ZONE */ - -cfs_mem_cache_t * -mem_cache_find(const char *name, size_t objsize) -{ - return NULL; -} - -cfs_mem_cache_t *mem_cache_create(vm_size_t size, const char *name) -{ - cfs_mem_cache_t *mc = NULL; - - MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO); - if (mc == NULL){ - CERROR("cfs_mem_cache created fail!\n"); - return NULL; - } - mc->mc_cache = OSMalloc_Tagalloc(name, OSMT_DEFAULT); - mc->mc_size = size; - return mc; -} - -void mem_cache_destroy(cfs_mem_cache_t *mc) -{ - OSMalloc_Tagfree(mc->mc_cache); - FREE(mc, M_TEMP); -} - -#define mem_cache_alloc(mc) OSMalloc((mc)->mc_size, (mc)->mc_cache) -#define mem_cache_alloc_nb(mc) OSMalloc_noblock((mc)->mc_size, (mc)->mc_cache) -#define mem_cache_free(mc, p) OSFree(p, (mc)->mc_size, (mc)->mc_cache) - -#endif /* !CFS_INDIVIDUAL_ZONE */ - -cfs_mem_cache_t * -cfs_mem_cache_create (const char *name, - size_t objsize, size_t off, unsigned long arg1) -{ - cfs_mem_cache_t *mc; - - mc = mem_cache_find(name, objsize); - if (mc) - return mc; - mc = mem_cache_create(objsize, name); - return mc; -} - -int cfs_mem_cache_destroy (cfs_mem_cache_t *cachep) -{ - mem_cache_destroy(cachep); - return 0; -} - -void *cfs_mem_cache_alloc (cfs_mem_cache_t *cachep, int flags) -{ - void *result; - - /* zalloc_canblock() is not exported... Emulate it. */ - if (flags & CFS_ALLOC_ATOMIC) { - result = (void *)mem_cache_alloc_nb(cachep); - } else { - LASSERT(get_preemption_level() == 0); - result = (void *)mem_cache_alloc(cachep); - } - if (result != NULL && (flags & CFS_ALLOC_ZERO)) - memset(result, 0, cachep->mc_size); - - return result; -} - -void cfs_mem_cache_free (cfs_mem_cache_t *cachep, void *objp) -{ - mem_cache_free(cachep, objp); -} - -/* --------------------------------------------------------------------------- - * Page operations - * - * --------------------------------------------------------------------------- */ - -/* - * "Raw" pages - */ - -static unsigned int raw_pages = 0; -static cfs_mem_cache_t *raw_page_cache = NULL; - -static struct xnu_page_ops raw_page_ops; -static struct xnu_page_ops *page_ops[XNU_PAGE_NTYPES] = { - [XNU_PAGE_RAW] = &raw_page_ops -}; - -#if defined(LIBCFS_DEBUG) -static int page_type_is_valid(cfs_page_t *page) -{ - LASSERT(page != NULL); - return 0 <= page->type && page->type < XNU_PAGE_NTYPES; -} - -static int page_is_raw(cfs_page_t *page) -{ - return page->type == XNU_PAGE_RAW; -} -#endif - -static struct xnu_raw_page *as_raw(cfs_page_t *page) -{ - LASSERT(page_is_raw(page)); - return list_entry(page, struct xnu_raw_page, header); -} - -static void *raw_page_address(cfs_page_t *pg) -{ - return (void *)as_raw(pg)->virtual; -} - -static void *raw_page_map(cfs_page_t *pg) -{ - return (void *)as_raw(pg)->virtual; -} - -static void raw_page_unmap(cfs_page_t *pg) -{ -} - -static struct xnu_page_ops raw_page_ops = { - .page_map = raw_page_map, - .page_unmap = raw_page_unmap, - .page_address = raw_page_address -}; - -extern int get_preemption_level(void); - -struct list_head page_death_row; -spinlock_t page_death_row_phylax; - -static void raw_page_finish(struct xnu_raw_page *pg) -{ - -- raw_pages; - if (pg->virtual != NULL) - cfs_mem_cache_free(raw_page_cache, pg->virtual); - cfs_free(pg); -} - -void raw_page_death_row_clean(void) -{ - struct xnu_raw_page *pg; - - spin_lock(&page_death_row_phylax); - while (!list_empty(&page_death_row)) { - pg = container_of(page_death_row.next, - struct xnu_raw_page, link); - list_del(&pg->link); - spin_unlock(&page_death_row_phylax); - raw_page_finish(pg); - spin_lock(&page_death_row_phylax); - } - spin_unlock(&page_death_row_phylax); -} - -/* Free a "page" */ -void free_raw_page(struct xnu_raw_page *pg) -{ - if (!atomic_dec_and_test(&pg->count)) - return; - /* - * kmem_free()->vm_map_remove()->vm_map_delete()->lock_write() may - * block. (raw_page_done()->upl_abort() can block too) On the other - * hand, cfs_free_page() may be called in non-blockable context. To - * work around this, park pages on global list when cannot block. - */ - if (get_preemption_level() > 0) { - spin_lock(&page_death_row_phylax); - list_add(&pg->link, &page_death_row); - spin_unlock(&page_death_row_phylax); - } else { - raw_page_finish(pg); - raw_page_death_row_clean(); - } -} - -cfs_page_t *cfs_alloc_page(u_int32_t flags) -{ - struct xnu_raw_page *page; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - page = cfs_alloc(sizeof *page, flags); - if (page != NULL) { - page->virtual = cfs_mem_cache_alloc(raw_page_cache, flags); - if (page->virtual != NULL) { - ++ raw_pages; - page->header.type = XNU_PAGE_RAW; - atomic_set(&page->count, 1); - } else { - cfs_free(page); - page = NULL; - } - } - return page != NULL ? &page->header : NULL; -} - -void cfs_free_page(cfs_page_t *pages) -{ - free_raw_page(as_raw(pages)); -} - -void cfs_get_page(cfs_page_t *p) -{ - atomic_inc(&as_raw(p)->count); -} - -int cfs_put_page_testzero(cfs_page_t *p) -{ - return atomic_dec_and_test(&as_raw(p)->count); -} - -int cfs_page_count(cfs_page_t *p) -{ - return atomic_read(&as_raw(p)->count); -} - -/* - * Generic page operations - */ - -void *cfs_page_address(cfs_page_t *pg) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - LASSERT(page_type_is_valid(pg)); - return page_ops[pg->type]->page_address(pg); -} - -void *cfs_kmap(cfs_page_t *pg) -{ - LASSERT(page_type_is_valid(pg)); - return page_ops[pg->type]->page_map(pg); -} - -void cfs_kunmap(cfs_page_t *pg) -{ - LASSERT(page_type_is_valid(pg)); - return page_ops[pg->type]->page_unmap(pg); -} - -void xnu_page_ops_register(int type, struct xnu_page_ops *ops) -{ - LASSERT(0 <= type && type < XNU_PAGE_NTYPES); - LASSERT(ops != NULL); - LASSERT(page_ops[type] == NULL); - - page_ops[type] = ops; -} - -void xnu_page_ops_unregister(int type) -{ - LASSERT(0 <= type && type < XNU_PAGE_NTYPES); - LASSERT(page_ops[type] != NULL); - - page_ops[type] = NULL; -} - -/* - * Portable memory allocator API - */ -#ifdef HAVE_GET_PREEMPTION_LEVEL -extern int get_preemption_level(void); -#else -#define get_preemption_level() (0) -#endif - -void *cfs_alloc(size_t nr_bytes, u_int32_t flags) -{ - int mflags; - - mflags = 0; - if (flags & CFS_ALLOC_ATOMIC) { - mflags |= M_NOWAIT; - } else { - LASSERT(get_preemption_level() == 0); - mflags |= M_WAITOK; - } - - if (flags & CFS_ALLOC_ZERO) - mflags |= M_ZERO; - - return _MALLOC(nr_bytes, M_TEMP, mflags); -} - -void cfs_free(void *addr) -{ - return _FREE(addr, M_TEMP); -} - -void *cfs_alloc_large(size_t nr_bytes) -{ - LASSERT(get_preemption_level() == 0); - return _MALLOC(nr_bytes, M_TEMP, M_WAITOK); -} - -void cfs_free_large(void *addr) -{ - LASSERT(get_preemption_level() == 0); - return _FREE(addr, M_TEMP); -} - -/* - * Lookup cfs_zone_nob by sysctl.zone, if it cannot be - * found (first load of * libcfs since boot), allocate - * sysctl libcfs.zone. - */ -int cfs_mem_init(void) -{ -#if CFS_INDIVIDUAL_ZONE - int rc; - size_t len; - - len = sizeof(struct cfs_zone_nob); - rc = sysctlbyname("libcfs.zone", - (void *)&cfs_zone_nob, &len, NULL, 0); - if (rc == ENOENT) { - /* zone_nob is not register in libcfs_sysctl */ - struct cfs_zone_nob *nob; - struct sysctl_oid *oid; - - assert(cfs_sysctl_isvalid()); - - nob = _MALLOC(sizeof(struct cfs_zone_nob), - M_TEMP, M_WAITOK | M_ZERO); - CFS_INIT_LIST_HEAD(&nob->z_link); - nob->z_nob = &nob->z_link; - oid = cfs_alloc_sysctl_struct(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, - "zone", nob, sizeof(struct cfs_zone_nob)); - if (oid == NULL) { - _FREE(nob, M_TEMP); - return -ENOMEM; - } - sysctl_register_oid(oid); - - cfs_zone_nob.z_nob = nob->z_nob; - } - spin_lock_init(&cfs_zone_guard); -#endif - CFS_INIT_LIST_HEAD(&page_death_row); - spin_lock_init(&page_death_row_phylax); - raw_page_cache = cfs_mem_cache_create("raw-page", CFS_PAGE_SIZE, 0, 0); - return 0; -} - -void cfs_mem_fini(void) -{ - raw_page_death_row_clean(); - spin_lock_done(&page_death_row_phylax); - cfs_mem_cache_destroy(raw_page_cache); - -#if CFS_INDIVIDUAL_ZONE - cfs_zone_nob.z_nob = NULL; - spin_lock_done(&cfs_zone_guard); -#endif -} diff --git a/lnet/libcfs/darwin/darwin-module.c b/lnet/libcfs/darwin/darwin-module.c deleted file mode 100644 index 10cb7d842f53df0bafc5c144085a90b36dd6ad72..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-module.c +++ /dev/null @@ -1,191 +0,0 @@ -#include <mach/mach_types.h> -#include <string.h> -#include <sys/file.h> -#include <sys/conf.h> -#include <miscfs/devfs/devfs.h> - -#define DEBUG_SUBSYSTEM S_LNET -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -int libcfs_ioctl_getdata(char *buf, char *end, void *arg) -{ - struct libcfs_ioctl_hdr *hdr; - struct libcfs_ioctl_data *data; - int err = 0; - ENTRY; - - hdr = (struct libcfs_ioctl_hdr *)buf; - data = (struct libcfs_ioctl_data *)buf; - /* libcfs_ioctl_data has been copied in by ioctl of osx */ - memcpy(buf, arg, sizeof(struct libcfs_ioctl_data)); - - if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { - CERROR("LIBCFS: version mismatch kernel vs application\n"); - RETURN(-EINVAL); - } - - if (hdr->ioc_len + buf >= end) { - CERROR("LIBCFS: user buffer exceeds kernel buffer\n"); - RETURN(-EINVAL); - } - - if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { - CERROR("LIBCFS: user buffer too small for ioctl\n"); - RETURN(-EINVAL); - } - buf += size_round(sizeof(*data)); - - if (data->ioc_inllen1) { - err = copy_from_user(buf, data->ioc_inlbuf1, size_round(data->ioc_inllen1)); - if (err) - RETURN(err); - data->ioc_inlbuf1 = buf; - buf += size_round(data->ioc_inllen1); - } - - if (data->ioc_inllen2) { - copy_from_user(buf, data->ioc_inlbuf2, size_round(data->ioc_inllen2)); - if (err) - RETURN(err); - data->ioc_inlbuf2 = buf; - } - - RETURN(err); -} - -int libcfs_ioctl_popdata(void *arg, void *data, int size) -{ - /* - * system call will copy out ioctl arg to user space - */ - memcpy(arg, data, size); - return 0; -} - -extern struct cfs_psdev_ops libcfs_psdev_ops; -struct libcfs_device_userstate *mdev_state[16]; - -static int -libcfs_psdev_open(dev_t dev, int flags, int devtype, struct proc *p) -{ - struct libcfs_device_userstate *mstat = NULL; - int rc = 0; - int devid; - devid = minor(dev); - - if (devid > 16) return (ENXIO); - - if (libcfs_psdev_ops.p_open != NULL) - rc = -libcfs_psdev_ops.p_open(0, &mstat); - else - rc = EPERM; - if (rc == 0) - mdev_state[devid] = mstat; - return rc; -} - -static int -libcfs_psdev_close(dev_t dev, int flags, int mode, struct proc *p) -{ - int devid; - devid = minor(dev); - int rc = 0; - - if (devid > 16) return (ENXIO); - - if (libcfs_psdev_ops.p_close != NULL) - rc = -libcfs_psdev_ops.p_close(0, mdev_state[devid]); - else - rc = EPERM; - if (rc == 0) - mdev_state[devid] = NULL; - return rc; -} - -static int -libcfs_ioctl (dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p) -{ - int rc = 0; - struct cfs_psdev_file pfile; - int devid; - devid = minor(dev); - - if (devid > 16) return (ENXIO); - - if (!is_suser()) - return (EPERM); - - pfile.off = 0; - pfile.private_data = mdev_state[devid]; - - if (libcfs_psdev_ops.p_ioctl != NULL) - rc = -libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); - else - rc = EPERM; - return rc; -} - -static struct cdevsw libcfs_devsw = -{ - .d_open = libcfs_psdev_open, - .d_close = libcfs_psdev_close, - .d_read = eno_rdwrt, - .d_write = eno_rdwrt, - .d_ioctl = libcfs_ioctl, - .d_stop = eno_stop, - .d_reset = eno_reset, - .d_ttys = NULL, - .d_select = eno_select, - .d_mmap = eno_mmap, - .d_strategy = eno_strat, - .d_getc = eno_getc, - .d_putc = eno_putc, - .d_type = 0 -}; - -cfs_psdev_t libcfs_dev = { - -1, - NULL, - "lnet", - &libcfs_devsw, - NULL -}; - -extern spinlock_t trace_cpu_serializer; -extern void cfs_sync_init(void); -extern void cfs_sync_fini(void); -extern int cfs_sysctl_init(void); -extern void cfs_sysctl_fini(void); -extern int cfs_mem_init(void); -extern int cfs_mem_fini(void); -extern void raw_page_death_row_clean(void); -extern void cfs_thread_agent_init(void); -extern void cfs_thread_agent_fini(void); -extern void cfs_symbol_init(void); -extern void cfs_symbol_fini(void); - -int libcfs_arch_init(void) -{ - cfs_sync_init(); - cfs_sysctl_init(); - cfs_mem_init(); - cfs_thread_agent_init(); - cfs_symbol_init(); - - spin_lock_init(&trace_cpu_serializer); - - return 0; -} - -void libcfs_arch_cleanup(void) -{ - spin_lock_done(&trace_cpu_serializer); - - cfs_symbol_fini(); - cfs_thread_agent_fini(); - cfs_mem_fini(); - cfs_sysctl_fini(); - cfs_sync_fini(); -} - diff --git a/lnet/libcfs/darwin/darwin-prim.c b/lnet/libcfs/darwin/darwin-prim.c deleted file mode 100644 index cdcabd94a6a35a7cc624c664d0e8e5addc821a56..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-prim.c +++ /dev/null @@ -1,581 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <mach/mach_types.h> -#include <string.h> -#include <sys/file.h> -#include <sys/conf.h> -#include <sys/uio.h> -#include <sys/filedesc.h> -#include <sys/namei.h> -#include <miscfs/devfs/devfs.h> -#include <kern/thread.h> - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * cfs pseudo device, actually pseudo char device in darwin - */ -#define KLNET_MAJOR -1 - -kern_return_t cfs_psdev_register(cfs_psdev_t *dev) { - dev->index = cdevsw_add(KLNET_MAJOR, dev->devsw); - if (dev->index < 0) { - printf("libcfs_init: failed to allocate a major number!\n"); - return KERN_FAILURE; - } - dev->handle = devfs_make_node(makedev (dev->index, 0), - DEVFS_CHAR, UID_ROOT, - GID_WHEEL, 0666, (char *)dev->name, 0); - return KERN_SUCCESS; -} - -kern_return_t cfs_psdev_deregister(cfs_psdev_t *dev) { - devfs_remove(dev->handle); - cdevsw_remove(dev->index, dev->devsw); - return KERN_SUCCESS; -} - -/* - * KPortal symbol register / unregister support - */ -struct rw_semaphore cfs_symbol_lock; -struct list_head cfs_symbol_list; - -void * -cfs_symbol_get(const char *name) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_read(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - sym->ref ++; - break; - } - } - up_read(&cfs_symbol_lock); - if (sym != NULL) - return sym->value; - return NULL; -} - -kern_return_t -cfs_symbol_put(const char *name) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_read(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - sym->ref --; - LASSERT(sym->ref >= 0); - break; - } - } - up_read(&cfs_symbol_lock); - LASSERT(sym != NULL); - - return 0; -} - -kern_return_t -cfs_symbol_register(const char *name, const void *value) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - struct cfs_symbol *new = NULL; - - MALLOC(new, struct cfs_symbol *, sizeof(struct cfs_symbol), M_TEMP, M_WAITOK|M_ZERO); - strncpy(new->name, name, CFS_SYMBOL_LEN); - new->value = (void *)value; - new->ref = 0; - CFS_INIT_LIST_HEAD(&new->sym_list); - - down_write(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - up_write(&cfs_symbol_lock); - FREE(new, M_TEMP); - return KERN_NAME_EXISTS; - } - - } - list_add_tail(&new->sym_list, &cfs_symbol_list); - up_write(&cfs_symbol_lock); - - return KERN_SUCCESS; -} - -kern_return_t -cfs_symbol_unregister(const char *name) -{ - struct list_head *walker; - struct list_head *nxt; - struct cfs_symbol *sym = NULL; - - down_write(&cfs_symbol_lock); - list_for_each_safe(walker, nxt, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - LASSERT(sym->ref == 0); - list_del (&sym->sym_list); - FREE(sym, M_TEMP); - break; - } - } - up_write(&cfs_symbol_lock); - - return KERN_SUCCESS; -} - -void -cfs_symbol_init() -{ - CFS_INIT_LIST_HEAD(&cfs_symbol_list); - init_rwsem(&cfs_symbol_lock); -} - -void -cfs_symbol_fini() -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_write(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - LASSERT(sym->ref == 0); - list_del (&sym->sym_list); - FREE(sym, M_TEMP); - } - up_write(&cfs_symbol_lock); - - fini_rwsem(&cfs_symbol_lock); - return; -} - -struct kernel_thread_arg -{ - spinlock_t lock; - atomic_t inuse; - cfs_thread_t func; - void *arg; -}; - -struct kernel_thread_arg cfs_thread_arg; - -#define THREAD_ARG_FREE 0 -#define THREAD_ARG_HOLD 1 -#define THREAD_ARG_RECV 2 - -#define set_targ_stat(a, v) atomic_set(&(a)->inuse, v) -#define get_targ_stat(a) atomic_read(&(a)->inuse) - -/* - * Hold the thread argument and set the status of thread_status - * to THREAD_ARG_HOLD, if the thread argument is held by other - * threads (It's THREAD_ARG_HOLD already), current-thread has to wait. - */ -#define thread_arg_hold(pta, _func, _arg) \ - do { \ - spin_lock(&(pta)->lock); \ - if (get_targ_stat(pta) == THREAD_ARG_FREE) { \ - set_targ_stat((pta), THREAD_ARG_HOLD); \ - (pta)->arg = (void *)_arg; \ - (pta)->func = _func; \ - spin_unlock(&(pta)->lock); \ - break; \ - } \ - spin_unlock(&(pta)->lock); \ - cfs_schedule(); \ - } while(1); \ - -/* - * Release the thread argument if the thread argument has been - * received by the child-thread (Status of thread_args is - * THREAD_ARG_RECV), otherwise current-thread has to wait. - * After release, the thread_args' status will be set to - * THREAD_ARG_FREE, and others can re-use the thread_args to - * create new kernel_thread. - */ -#define thread_arg_release(pta) \ - do { \ - spin_lock(&(pta)->lock); \ - if (get_targ_stat(pta) == THREAD_ARG_RECV) { \ - (pta)->arg = NULL; \ - (pta)->func = NULL; \ - set_targ_stat(pta, THREAD_ARG_FREE); \ - spin_unlock(&(pta)->lock); \ - break; \ - } \ - spin_unlock(&(pta)->lock); \ - cfs_schedule(); \ - } while(1) - -/* - * Receive thread argument (Used in child thread), set the status - * of thread_args to THREAD_ARG_RECV. - */ -#define __thread_arg_recv_fin(pta, _func, _arg, fin) \ - do { \ - spin_lock(&(pta)->lock); \ - if (get_targ_stat(pta) == THREAD_ARG_HOLD) { \ - if (fin) \ - set_targ_stat(pta, THREAD_ARG_RECV);\ - _arg = (pta)->arg; \ - _func = (pta)->func; \ - spin_unlock(&(pta)->lock); \ - break; \ - } \ - spin_unlock(&(pta)->lock); \ - cfs_schedule(); \ - } while (1); \ - -/* - * Just set the thread_args' status to THREAD_ARG_RECV - */ -#define thread_arg_fin(pta) \ - do { \ - spin_lock(&(pta)->lock); \ - assert( get_targ_stat(pta) == THREAD_ARG_HOLD); \ - set_targ_stat(pta, THREAD_ARG_RECV); \ - spin_unlock(&(pta)->lock); \ - } while(0) - -#define thread_arg_recv(pta, f, a) __thread_arg_recv_fin(pta, f, a, 1) -#define thread_arg_keep(pta, f, a) __thread_arg_recv_fin(pta, f, a, 0) - -void -cfs_thread_agent_init(void) -{ - set_targ_stat(&cfs_thread_arg, THREAD_ARG_FREE); - spin_lock_init(&cfs_thread_arg.lock); - cfs_thread_arg.arg = NULL; - cfs_thread_arg.func = NULL; -} - -void -cfs_thread_agent_fini(void) -{ - assert(get_targ_stat(&cfs_thread_arg) == THREAD_ARG_FREE); - - spin_lock_done(&cfs_thread_arg.lock); -} - -/* - * - * All requests to create kernel thread will create a new - * thread instance of cfs_thread_agent, one by one. - * cfs_thread_agent will call the caller's thread function - * with argument supplied by caller. - */ -void -cfs_thread_agent (void) -{ - cfs_thread_t func = NULL; - void *arg = NULL; - - thread_arg_recv(&cfs_thread_arg, func, arg); - /* printf("entry of thread agent (func: %08lx).\n", (void *)func); */ - assert(func != NULL); - func(arg); - /* printf("thread agent exit. (func: %08lx)\n", (void *)func); */ - (void) thread_terminate(current_thread()); -} - -extern thread_t kernel_thread(task_t task, void (*start)(void)); - -int -cfs_kernel_thread(cfs_thread_t func, void *arg, int flag) -{ - int ret = 0; - thread_t th = NULL; - - thread_arg_hold(&cfs_thread_arg, func, arg); - th = kernel_thread(kernel_task, cfs_thread_agent); - thread_arg_release(&cfs_thread_arg); - if (th == THREAD_NULL) - ret = -1; - return ret; -} - -void cfs_daemonize(char *str) -{ - snprintf(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX, "%s", str); - return; -} - -/* - * XXX Liang: kexts cannot access sigmask in Darwin8. - * it's almost impossible for us to get/set signal mask - * without patching kernel. - * Should we provide these functions in xnu? - * - * These signal functions almost do nothing now, we - * need to investigate more about signal in Darwin. - */ -cfs_sigset_t cfs_get_blockedsigs() -{ - return (cfs_sigset_t)0; -} - -extern int block_procsigmask(struct proc *p, int bit); - -cfs_sigset_t cfs_block_allsigs() -{ - cfs_sigset_t old = 0; -#ifdef __DARWIN8__ -#else - block_procsigmask(current_proc(), -1); -#endif - return old; -} - -cfs_sigset_t cfs_block_sigs(sigset_t bit) -{ - cfs_sigset_t old = 0; -#ifdef __DARWIN8__ -#else - block_procsigmask(current_proc(), bit); -#endif - return old; -} - -void cfs_restore_sigs(cfs_sigset_t old) -{ -} - -int cfs_signal_pending(void) - -{ -#ifdef __DARWIN8__ - extern int thread_issignal(proc_t, thread_t, sigset_t); - return thread_issignal(current_proc(), current_thread(), (sigset_t)-1); -#else - return SHOULDissignal(current_proc(), current_uthread()) -#endif -} - -void cfs_clear_sigpending(void) -{ -#ifdef __DARWIN8__ -#else - clear_procsiglist(current_proc(), -1); -#endif -} - -#ifdef __DARWIN8__ - -#else /* !__DARWIN8__ */ - -void lustre_cone_in(boolean_t *state, funnel_t **cone) -{ - *cone = thread_funnel_get(); - if (*cone == network_flock) - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - else if (*cone == NULL) - *state = thread_funnel_set(kernel_flock, TRUE); -} - -void lustre_cone_ex(boolean_t state, funnel_t *cone) -{ - if (cone == network_flock) - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - else if (cone == NULL) - (void) thread_funnel_set(kernel_flock, state); -} - -void lustre_net_in(boolean_t *state, funnel_t **cone) -{ - *cone = thread_funnel_get(); - if (*cone == kernel_flock) - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - else if (*cone == NULL) - *state = thread_funnel_set(network_flock, TRUE); -} - -void lustre_net_ex(boolean_t state, funnel_t *cone) -{ - if (cone == kernel_flock) - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - else if (cone == NULL) - (void) thread_funnel_set(network_flock, state); -} -#endif /* !__DARWIN8__ */ - -void cfs_waitq_init(struct cfs_waitq *waitq) -{ - ksleep_chan_init(&waitq->wq_ksleep_chan); -} - -void cfs_waitlink_init(struct cfs_waitlink *link) -{ - ksleep_link_init(&link->wl_ksleep_link); -} - -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link) -{ - link->wl_waitq = waitq; - ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); -} - -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, - struct cfs_waitlink *link) -{ - link->wl_waitq = waitq; - link->wl_ksleep_link.flags |= KSLEEP_EXCLUSIVE; - ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); -} - -void cfs_waitq_forward(struct cfs_waitlink *link, - struct cfs_waitq *waitq) -{ - link->wl_ksleep_link.forward = &waitq->wq_ksleep_chan; -} - -void cfs_waitq_del(struct cfs_waitq *waitq, - struct cfs_waitlink *link) -{ - ksleep_del(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); -} - -int cfs_waitq_active(struct cfs_waitq *waitq) -{ - return (1); -} - -void cfs_waitq_signal(struct cfs_waitq *waitq) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - ksleep_wake(&waitq->wq_ksleep_chan); -} - -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr) -{ - ksleep_wake_nr(&waitq->wq_ksleep_chan, nr); -} - -void cfs_waitq_broadcast(struct cfs_waitq *waitq) -{ - ksleep_wake_all(&waitq->wq_ksleep_chan); -} - -void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state) -{ - ksleep_wait(&link->wl_waitq->wq_ksleep_chan, state); -} - -cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, - cfs_task_state_t state, - cfs_duration_t timeout) -{ - return ksleep_timedwait(&link->wl_waitq->wq_ksleep_chan, - state, timeout); -} - -typedef void (*ktimer_func_t)(void *); -void cfs_timer_init(cfs_timer_t *t, void (* func)(unsigned long), void *arg) -{ - ktimer_init(&t->t, (ktimer_func_t)func, arg); -} - -void cfs_timer_done(struct cfs_timer *t) -{ - ktimer_done(&t->t); -} - -void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline) -{ - ktimer_arm(&t->t, deadline); -} - -void cfs_timer_disarm(struct cfs_timer *t) -{ - ktimer_disarm(&t->t); -} - -int cfs_timer_is_armed(struct cfs_timer *t) -{ - return ktimer_is_armed(&t->t); -} - -cfs_time_t cfs_timer_deadline(struct cfs_timer *t) -{ - return ktimer_deadline(&t->t); -} - -void cfs_enter_debugger(void) -{ -#ifdef __DARWIN8__ - extern void Debugger(const char * reason); - Debugger("CFS"); -#else - extern void PE_enter_debugger(char *cause); - PE_enter_debugger("CFS"); -#endif -} - -int cfs_online_cpus(void) -{ - int activecpu; - size_t size; - -#ifdef __DARWIN8__ - size = sizeof(int); - sysctlbyname("hw.activecpu", &activecpu, &size, NULL, 0); - return activecpu; -#else - host_basic_info_data_t hinfo; - kern_return_t kret; - int count = HOST_BASIC_INFO_COUNT; -#define BSD_HOST 1 - kret = host_info(BSD_HOST, HOST_BASIC_INFO, &hinfo, &count); - if (kret == KERN_SUCCESS) - return (hinfo.avail_cpus); - return(-EINVAL); -#endif -} - -int cfs_ncpus(void) -{ - int ncpu; - size_t size; - - size = sizeof(int); - - sysctlbyname("hw.ncpu", &ncpu, &size, NULL, 0); - return ncpu; -} diff --git a/lnet/libcfs/darwin/darwin-proc.c b/lnet/libcfs/darwin/darwin-proc.c deleted file mode 100644 index a001f5ba132b738fecb47cbde1c4018f28d15202..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-proc.c +++ /dev/null @@ -1,467 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <sys/param.h> -#include <sys/kernel.h> -#include <sys/malloc.h> -#include <sys/systm.h> -#include <sys/sysctl.h> -#include <sys/proc.h> -#include <sys/unistd.h> -#include <mach/mach_types.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> - -#define LIBCFS_SYSCTL "libcfs" -#define LIBCFS_SYSCTL_SPRITE "sprite" -#define LIBCFS_SYSCTL_MAGIC 0xbabeface - -static struct libcfs_sysctl_sprite { - int ss_magic; - struct sysctl_oid_list *ss_link; -} libcfs_sysctl_sprite = { 0, NULL }; - -static cfs_sysctl_table_header_t *libcfs_table_header = NULL; -extern unsigned int libcfs_debug; -extern unsigned int libcfs_subsystem_debug; -extern unsigned int libcfs_printk; -extern unsigned int libcfs_console_ratelimit; -extern unsigned int libcfs_catastrophe; -extern atomic_t libcfs_kmemory; - -static int sysctl_debug_kernel SYSCTL_HANDLER_ARGS -{ -#error "Check me" - const int maxstr = 1024; - char *str; - int error; - - if (req->newptr == USER_ADDR_NULL) { - /* read request */ - return -EINVAL; - } - - /* write request */ - error = trace_allocate_string_buffer(&str, maxstr + 1); - if (error != 0) - return error; - - error = SYSCTL_IN(req, str, maxstr); - - /* NB str guaranteed terminted */ - if (error == 0) - error = tracefile_dump_all_pages(str); - - trace_free_string_buffer(str, maxstr + 1); - return error; -} - -static int sysctl_daemon_file SYSCTL_HANDLER_ARGS -{ -#error "Check me" - int error; - char *str; - - if (req->newptr == USER_ADDR_NULL) { - /* a read */ - tracefile_read_lock(); - - /* include terminating '\0' */ - error = SYSCTL_OUT(req, tracefile, strlen(tracefile) + 1); - - tracefile_read_unlock(); - return error; - } - - /* write request */ - error = trace_allocate_string_buffer(&str, TRACEFILE_NAME_SIZE); - if (error != 0) - return error; - - error = SYSCTL_IN(req, str, TRACEFILE_NAME_SIZE - 1); - - /* NB str guaranteed terminted */ - if (error == 0) - error = trace_daemon_command(str); - - trace_free_string_buffer(str, TRACEFILE_NAME_SIZE); - return error; -} - - -static int sysctl_debug_mb SYSCTL_HANDLER_ARGS -{ -#error "Check me" - long mb; - int error; - - if (req->newptr == USER_ADDR_NULL) { - /* read */ - mb = trace_get_debug_mb(); - error = SYSCTL_OUT(req, &mb, sizeof(mb)); - } else { - /* write */ - error = SYSCTL_IN(req, &mb, sizeof(mb)); - if (error == 0) - error = trace_set_debug_mb(mb); - } - - return error; -} - -/* - * sysctl table for lnet - */ - -SYSCTL_NODE (, OID_AUTO, lnet, CTLFLAG_RW, - 0, "lnet sysctl top"); - -SYSCTL_INT(_lnet, OID_AUTO, debug, - CTLTYPE_INT | CTLFLAG_RW , &libcfs_debug, - 0, "debug"); -SYSCTL_INT(_lnet, OID_AUTO, subsystem_debug, - CTLTYPE_INT | CTLFLAG_RW, &libcfs_subsystem_debug, - 0, "subsystem debug"); -SYSCTL_INT(_lnet, OID_AUTO, printk, - CTLTYPE_INT | CTLFLAG_RW, &libcfs_printk, - 0, "printk"); -SYSCTL_INT(_lnet, OID_AUTO, console_ratelimit, - CTLTYPE_INT | CTLFLAG_RW, &libcfs_console_ratelimit, - 0, "console_ratelimit"); -SYSCTL_STRING(_lnet, OID_AUTO, debug_path, - CTLTYPE_STRING | CTLFLAG_RW, debug_file_path, - 1024, "debug path"); -SYSCTL_INT(_lnet, OID_AUTO, memused, - CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_kmemory.counter, - 0, "memused"); -SYSCTL_INT(_lnet, OID_AUTO, catastrophe, - CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_catastrophe, - 0, "catastrophe"); - -#error "check me" -SYSCTL_PROC(_lnet, OID_AUTO, debug_kernel, - CTLTYPE_STRING | CTLFLAG_W, 0, - 0, &sysctl_debug_kernel, "A", "debug_kernel"); -SYSCTL_PROC(_lnet, OID_AUTO, daemon_file, - CTLTYPE_STRING | CTLFLAG_RW, 0, - 0, &sysctl_daemon_file, "A", "daemon_file"); -SYSCTL_PROC(_lnet, OID_AUTO, debug_mb, - CTLTYPE_INT | CTLFLAG_RW, 0, - 0, &sysctl_debug_mb, "L", "debug_mb"); - - -static cfs_sysctl_table_t top_table[] = { - &sysctl__lnet, - &sysctl__lnet_debug, - &sysctl__lnet_subsystem_debug, - &sysctl__lnet_printk, - &sysctl__lnet_console_ratelimit, - &sysctl__lnet_debug_path, - &sysctl__lnet_memused, - &sysctl__lnet_catastrophe, - &sysctl__lnet_debug_kernel, - &sysctl__lnet_daemon_file, - &sysctl__lnet_debug_mb, - NULL -}; - -/* - * Register sysctl table - */ -cfs_sysctl_table_header_t * -cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg) -{ - cfs_sysctl_table_t item; - int i = 0; - - while ((item = table[i++]) != NULL) - sysctl_register_oid(item); - return table; -} - -/* - * Unregister sysctl table - */ -void -cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table) { - int i = 0; - cfs_sysctl_table_t item; - - while ((item = table[i++]) != NULL) - sysctl_unregister_oid(item); - return; -} - -/* - * Allocate a sysctl oid. - */ -static struct sysctl_oid * -cfs_alloc_sysctl(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, void *arg1, int arg2, const char *fmt, - int (*handler) SYSCTL_HANDLER_ARGS) -{ - struct sysctl_oid *oid; - char *sname = NULL; - char *sfmt = NULL; - - if (strlen(name) + 1 > CTL_MAXNAME) { - printf("libcfs: sysctl name: %s is too long.\n", name); - return NULL; - } - oid = (struct sysctl_oid*)_MALLOC(sizeof(struct sysctl_oid), - M_TEMP, M_WAITOK | M_ZERO); - if (oid == NULL) - return NULL; - - sname = (char *)_MALLOC(sizeof(CTL_MAXNAME), - M_TEMP, M_WAITOK | M_ZERO); - if (sname == NULL) - goto error; - strcpy(sname, name); - - sfmt = (char *)_MALLOC(4, M_TEMP, M_WAITOK | M_ZERO); - if (sfmt == NULL) - goto error; - strcpy(sfmt, fmt); - - if (parent == NULL) - oid->oid_parent = &sysctl__children; - else - oid->oid_parent = parent; - oid->oid_number = nbr; - oid->oid_kind = access; - oid->oid_name = sname; - oid->oid_handler = handler; - oid->oid_fmt = sfmt; - - if ((access & CTLTYPE) == CTLTYPE_NODE){ - /* It's a sysctl node */ - struct sysctl_oid_list *link; - - link = (struct sysctl_oid_list *)_MALLOC(sizeof(struct sysctl_oid_list), - M_TEMP, M_WAITOK | M_ZERO); - if (link == NULL) - goto error; - oid->oid_arg1 = link; - oid->oid_arg2 = 0; - } else { - oid->oid_arg1 = arg1; - oid->oid_arg2 = arg2; - } - - return oid; -error: - if (sfmt != NULL) - _FREE(sfmt, M_TEMP); - if (sname != NULL) - _FREE(sname, M_TEMP); - if (oid != NULL) - _FREE(oid, M_TEMP); - return NULL; -} - -void cfs_free_sysctl(struct sysctl_oid *oid) -{ - if (oid->oid_name != NULL) - _FREE((void *)oid->oid_name, M_TEMP); - if (oid->oid_fmt != NULL) - _FREE((void *)oid->oid_fmt, M_TEMP); - if ((oid->oid_kind & CTLTYPE_NODE != 0) && oid->oid_arg1) - /* XXX Liang: need to assert the list is empty */ - _FREE(oid->oid_arg1, M_TEMP); - _FREE(oid, M_TEMP); -} - -#define CFS_SYSCTL_ISVALID ((libcfs_sysctl_sprite.ss_magic == LIBCFS_SYSCTL_MAGIC) && \ - (libcfs_sysctl_sprite.ss_link != NULL)) - -int -cfs_sysctl_isvalid(void) -{ - return CFS_SYSCTL_ISVALID; -} - -struct sysctl_oid * -cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int (*handler) SYSCTL_HANDLER_ARGS) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_NODE | access, name, - NULL, 0, "N", handler); -} - -struct sysctl_oid * -cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int *ptr, int val) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name, - ptr, val, "I", sysctl_handle_int); -} - -struct sysctl_oid * -cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int *ptr, int val) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name, - ptr, val, "L", sysctl_handle_long); -} - -struct sysctl_oid * -cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, char *ptr, int len) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_STRING | access, name, - ptr, len, "A", sysctl_handle_string); -} - -struct sysctl_oid * -cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, void *ptr, int size) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_OPAQUE | access, name, - ptr, size, "S", sysctl_handle_opaque); -} - -/* no proc in osx */ -cfs_proc_dir_entry_t * -cfs_create_proc_entry(char *name, int mod, cfs_proc_dir_entry_t *parent) -{ - cfs_proc_dir_entry_t *entry; - MALLOC(entry, cfs_proc_dir_entry_t *, sizeof(cfs_proc_dir_entry_t), M_TEMP, M_WAITOK|M_ZERO); - - return entry; -} - -void -cfs_free_proc_entry(cfs_proc_dir_entry_t *de){ - FREE(de, M_TEMP); - return; -}; - -void -cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry) -{ - cfs_free_proc_entry(entry); - return; -} - -int -insert_proc(void) -{ -#if 1 - if (!libcfs_table_header) - libcfs_table_header = cfs_register_sysctl_table(top_table, 0); -#endif - return 0; -} - -void -remove_proc(void) -{ -#if 1 - if (libcfs_table_header != NULL) - cfs_unregister_sysctl_table(libcfs_table_header); - libcfs_table_header = NULL; -#endif - return; -} - -int -cfs_sysctl_init(void) -{ - struct sysctl_oid *oid_root; - struct sysctl_oid *oid_sprite; - struct libcfs_sysctl_sprite *sprite; - size_t len; - int rc; - - len = sizeof(struct libcfs_sysctl_sprite); - rc = sysctlbyname("libcfs.sprite", - (void *)&libcfs_sysctl_sprite, &len, NULL, 0); - if (rc == 0) { - /* - * XXX Liang: assert (rc == 0 || rc == ENOENT) - * - * libcfs.sprite has been registered by previous - * loading of libcfs - */ - if (libcfs_sysctl_sprite.ss_magic != LIBCFS_SYSCTL_MAGIC) { - printf("libcfs: magic number of libcfs.sprite " - "is not right (%lx, %lx)\n", - libcfs_sysctl_sprite.ss_magic, - LIBCFS_SYSCTL_MAGIC); - return -1; - } - assert(libcfs_sysctl_sprite.ss_link != NULL); - printf("libcfs: registered libcfs.sprite found.\n"); - return 0; - } - oid_root = cfs_alloc_sysctl_node(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, - LIBCFS_SYSCTL, 0); - if (oid_root == NULL) - return -1; - sysctl_register_oid(oid_root); - - sprite = (struct libcfs_sysctl_sprite *)_MALLOC(sizeof(struct libcfs_sysctl_sprite), - M_TEMP, M_WAITOK | M_ZERO); - if (sprite == NULL) { - sysctl_unregister_oid(oid_root); - cfs_free_sysctl(oid_root); - return -1; - } - sprite->ss_magic = LIBCFS_SYSCTL_MAGIC; - sprite->ss_link = (struct sysctl_oid_list *)oid_root->oid_arg1; - oid_sprite = cfs_alloc_sysctl_struct((struct sysctl_oid_list *)oid_root->oid_arg1, - OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, - LIBCFS_SYSCTL_SPRITE, sprite, - sizeof(struct libcfs_sysctl_sprite)); - if (oid_sprite == NULL) { - cfs_free_sysctl(oid_sprite); - sysctl_unregister_oid(oid_root); - cfs_free_sysctl(oid_root); - return -1; - } - sysctl_register_oid(oid_sprite); - - libcfs_sysctl_sprite.ss_magic = sprite->ss_magic; - libcfs_sysctl_sprite.ss_link = sprite->ss_link; - - return 0; -} - -void -cfs_sysctl_fini(void) -{ - libcfs_sysctl_sprite.ss_magic = 0; - libcfs_sysctl_sprite.ss_link = NULL; -} - diff --git a/lnet/libcfs/darwin/darwin-sync.c b/lnet/libcfs/darwin/darwin-sync.c deleted file mode 100644 index 8b752e33f838095bea5d1a7394b44e6d066f7610..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-sync.c +++ /dev/null @@ -1,1025 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Light Super operations - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -/* - * xnu_sync.c - * - * Created by nikita on Sun Jul 18 2004. - * - * XNU synchronization primitives. - */ - -/* - * This file contains very simplistic implementations of (saner) API for - * basic synchronization primitives: - * - * - spin-lock (kspin) - * - * - semaphore (ksem) - * - * - mutex (kmut) - * - * - condition variable (kcond) - * - * - wait-queue (ksleep_chan and ksleep_link) - * - * - timer (ktimer) - * - * A lot can be optimized here. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#ifdef __DARWIN8__ -# include <kern/locks.h> -#else -# include <mach/mach_types.h> -# include <sys/types.h> -# include <kern/simple_lock.h> -#endif - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#define SLASSERT(e) ON_SYNC_DEBUG(LASSERT(e)) - -#ifdef HAVE_GET_PREEMPTION_LEVEL -extern int get_preemption_level(void); -#else -#define get_preemption_level() (0) -#endif - -#if SMP -#ifdef __DARWIN8__ - -static lck_grp_t *cfs_lock_grp = NULL; -#warning "Verify definition of lck_spin_t hasn't been changed while building!" - -/* hw_lock_* are not exported by Darwin8 */ -static inline void xnu_spin_init(xnu_spin_t *s) -{ - SLASSERT(cfs_lock_grp != NULL); - //*s = lck_spin_alloc_init(cfs_lock_grp, LCK_ATTR_NULL); - lck_spin_init((lck_spin_t *)s, cfs_lock_grp, LCK_ATTR_NULL); -} - -static inline void xnu_spin_done(xnu_spin_t *s) -{ - SLASSERT(cfs_lock_grp != NULL); - //lck_spin_free(*s, cfs_lock_grp); - //*s = NULL; - lck_spin_destroy((lck_spin_t *)s, cfs_lock_grp); -} - -#define xnu_spin_lock(s) lck_spin_lock((lck_spin_t *)(s)) -#define xnu_spin_unlock(s) lck_spin_unlock((lck_spin_t *)(s)) - -#warning "Darwin8 does not export lck_spin_try_lock" -#define xnu_spin_try(s) (1) - -#else /* DARWIN8 */ -extern void hw_lock_init(hw_lock_t); -extern void hw_lock_lock(hw_lock_t); -extern void hw_lock_unlock(hw_lock_t); -extern unsigned int hw_lock_to(hw_lock_t, unsigned int); -extern unsigned int hw_lock_try(hw_lock_t); -extern unsigned int hw_lock_held(hw_lock_t); - -#define xnu_spin_init(s) hw_lock_init(s) -#define xnu_spin_done(s) do {} while (0) -#define xnu_spin_lock(s) hw_lock_lock(s) -#define xnu_spin_unlock(s) hw_lock_unlock(s) -#define xnu_spin_try(s) hw_lock_try(s) -#endif /* DARWIN8 */ - -#else /* SMP */ -#define xnu_spin_init(s) do {} while (0) -#define xnu_spin_done(s) do {} while (0) -#define xnu_spin_lock(s) do {} while (0) -#define xnu_spin_unlock(s) do {} while (0) -#define xnu_spin_try(s) (1) -#endif /* SMP */ - -/* - * Warning: low level libcfs debugging code (libcfs_debug_msg(), for - * example), uses spin-locks, so debugging output here may lead to nasty - * surprises. - * - * In uniprocessor version of spin-lock. Only checks. - */ - -void kspin_init(struct kspin *spin) -{ - SLASSERT(spin != NULL); - xnu_spin_init(&spin->lock); - ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC); - ON_SYNC_DEBUG(spin->owner = NULL); -} - -void kspin_done(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - SLASSERT(spin->owner == NULL); - xnu_spin_done(&spin->lock); -} - -void kspin_lock(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - SLASSERT(spin->owner != current_thread()); - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - xnu_spin_lock(&spin->lock); - SLASSERT(spin->owner == NULL); - ON_SYNC_DEBUG(spin->owner = current_thread()); -} - -void kspin_unlock(struct kspin *spin) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - SLASSERT(spin->owner == current_thread()); - ON_SYNC_DEBUG(spin->owner = NULL); - xnu_spin_unlock(&spin->lock); -} - -int kspin_trylock(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - - if (xnu_spin_try(&spin->lock)) { - SLASSERT(spin->owner == NULL); - ON_SYNC_DEBUG(spin->owner = current_thread()); - return 1; - } else - return 0; -} - -#if XNU_SYNC_DEBUG -int kspin_islocked(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - return spin->owner == current_thread(); -} - -int kspin_isnotlocked(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - return spin->owner != current_thread(); -} -#endif - -/* - * read/write spin-lock - */ -void krw_spin_init(struct krw_spin *rwspin) -{ - SLASSERT(rwspin != NULL); - - kspin_init(&rwspin->guard); - rwspin->count = 0; - ON_SYNC_DEBUG(rwspin->magic = KRW_SPIN_MAGIC); -} - -void krw_spin_done(struct krw_spin *rwspin) -{ - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - SLASSERT(rwspin->count == 0); - kspin_done(&rwspin->guard); -} - -void krw_spin_down_r(struct krw_spin *rwspin) -{ - int i; - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - - kspin_lock(&rwspin->guard); - while(rwspin->count < 0) { - i = -1; - kspin_unlock(&rwspin->guard); - while (--i != 0 && rwspin->count < 0) - continue; - kspin_lock(&rwspin->guard); - } - ++ rwspin->count; - kspin_unlock(&rwspin->guard); -} - -void krw_spin_down_w(struct krw_spin *rwspin) -{ - int i; - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - - kspin_lock(&rwspin->guard); - while (rwspin->count != 0) { - i = -1; - kspin_unlock(&rwspin->guard); - while (--i != 0 && rwspin->count != 0) - continue; - kspin_lock(&rwspin->guard); - } - rwspin->count = -1; - kspin_unlock(&rwspin->guard); -} - -void krw_spin_up_r(struct krw_spin *rwspin) -{ - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - SLASSERT(rwspin->count > 0); - - kspin_lock(&rwspin->guard); - -- rwspin->count; - kspin_unlock(&rwspin->guard); -} - -void krw_spin_up_w(struct krw_spin *rwspin) -{ - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - SLASSERT(rwspin->count == -1); - - kspin_lock(&rwspin->guard); - rwspin->count = 0; - kspin_unlock(&rwspin->guard); -} - -/* - * semaphore - */ -#ifdef __DARWIN8__ - -#define xnu_waitq_init(q, a) do {} while (0) -#define xnu_waitq_done(q) do {} while (0) -#define xnu_waitq_wakeup_one(q, e, s) ({wakeup_one((void *)(e)); KERN_SUCCESS;}) -#define xnu_waitq_wakeup_all(q, e, s) ({wakeup((void *)(e)); KERN_SUCCESS;}) -#define xnu_waitq_assert_wait(q, e, s) assert_wait((e), s) - -#else /* DARWIN8 */ - -#define xnu_waitq_init(q, a) wait_queue_init((q), a) -#define xnu_waitq_done(q) do {} while (0) -#define xnu_waitq_wakeup_one(q, e, s) wait_queue_wakeup_one((q), (event_t)(e), s) -#define xnu_waitq_wakeup_all(q, e, s) wait_queue_wakeup_all((q), (event_t)(e), s) -#define xnu_waitq_assert_wait(q, e, s) wait_queue_assert_wait((q), (event_t)(e), s) - -#endif /* DARWIN8 */ -void ksem_init(struct ksem *sem, int value) -{ - SLASSERT(sem != NULL); - kspin_init(&sem->guard); - xnu_waitq_init(&sem->q, SYNC_POLICY_FIFO); - sem->value = value; - ON_SYNC_DEBUG(sem->magic = KSEM_MAGIC); -} - -void ksem_done(struct ksem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KSEM_MAGIC); - /* - * XXX nikita: cannot check that &sem->q is empty because - * wait_queue_empty() is Apple private API. - */ - kspin_done(&sem->guard); -} - -int ksem_up(struct ksem *sem, int value) -{ - int result; - - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KSEM_MAGIC); - SLASSERT(value >= 0); - - kspin_lock(&sem->guard); - sem->value += value; - if (sem->value == 0) - result = xnu_waitq_wakeup_one(&sem->q, sem, - THREAD_AWAKENED); - else - result = xnu_waitq_wakeup_all(&sem->q, sem, - THREAD_AWAKENED); - kspin_unlock(&sem->guard); - SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING); - return (result == KERN_SUCCESS) ? 0 : 1; -} - -void ksem_down(struct ksem *sem, int value) -{ - int result; - - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KSEM_MAGIC); - SLASSERT(value >= 0); - SLASSERT(get_preemption_level() == 0); - - kspin_lock(&sem->guard); - while (sem->value < value) { - result = xnu_waitq_assert_wait(&sem->q, sem, - THREAD_UNINT); - SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); - kspin_unlock(&sem->guard); - if (result == THREAD_WAITING) - thread_block(THREAD_CONTINUE_NULL); - kspin_lock(&sem->guard); - } - sem->value -= value; - kspin_unlock(&sem->guard); -} - -int ksem_trydown(struct ksem *sem, int value) -{ - int result; - - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KSEM_MAGIC); - SLASSERT(value >= 0); - - kspin_lock(&sem->guard); - if (sem->value >= value) { - sem->value -= value; - result = 0; - } else - result = -EBUSY; - kspin_unlock(&sem->guard); - return result; -} - -void kmut_init(struct kmut *mut) -{ - SLASSERT(mut != NULL); - ksem_init(&mut->s, 1); - ON_SYNC_DEBUG(mut->magic = KMUT_MAGIC); - ON_SYNC_DEBUG(mut->owner = NULL); -} - -void kmut_done(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - SLASSERT(mut->owner == NULL); - ksem_done(&mut->s); -} - -void kmut_lock(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - SLASSERT(mut->owner != current_thread()); - SLASSERT(get_preemption_level() == 0); - - ksem_down(&mut->s, 1); - ON_SYNC_DEBUG(mut->owner = current_thread()); -} - -void kmut_unlock(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - SLASSERT(mut->owner == current_thread()); - - ON_SYNC_DEBUG(mut->owner = NULL); - ksem_up(&mut->s, 1); -} - -int kmut_trylock(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - return ksem_trydown(&mut->s, 1); -} - -#if XNU_SYNC_DEBUG -int kmut_islocked(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - return mut->owner == current_thread(); -} - -int kmut_isnotlocked(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - return mut->owner != current_thread(); -} -#endif - - -void kcond_init(struct kcond *cond) -{ - SLASSERT(cond != NULL); - - kspin_init(&cond->guard); - cond->waiters = NULL; - ON_SYNC_DEBUG(cond->magic = KCOND_MAGIC); -} - -void kcond_done(struct kcond *cond) -{ - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(cond->waiters == NULL); - kspin_done(&cond->guard); -} - -void kcond_wait(struct kcond *cond, struct kspin *lock) -{ - struct kcond_link link; - - SLASSERT(cond != NULL); - SLASSERT(lock != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(kspin_islocked(lock)); - - ksem_init(&link.sem, 0); - kspin_lock(&cond->guard); - link.next = cond->waiters; - cond->waiters = &link; - kspin_unlock(&cond->guard); - kspin_unlock(lock); - - ksem_down(&link.sem, 1); - - kspin_lock(&cond->guard); - kspin_unlock(&cond->guard); - kspin_lock(lock); -} - -void kcond_wait_guard(struct kcond *cond) -{ - struct kcond_link link; - - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(kspin_islocked(&cond->guard)); - - ksem_init(&link.sem, 0); - link.next = cond->waiters; - cond->waiters = &link; - kspin_unlock(&cond->guard); - - ksem_down(&link.sem, 1); - - kspin_lock(&cond->guard); -} - -void kcond_signal_guard(struct kcond *cond) -{ - struct kcond_link *link; - - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(kspin_islocked(&cond->guard)); - - link = cond->waiters; - if (link != NULL) { - cond->waiters = link->next; - ksem_up(&link->sem, 1); - } -} - -void kcond_signal(struct kcond *cond) -{ - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - - kspin_lock(&cond->guard); - kcond_signal_guard(cond); - kspin_unlock(&cond->guard); -} - -void kcond_broadcast_guard(struct kcond *cond) -{ - struct kcond_link *link; - - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(kspin_islocked(&cond->guard)); - - for (link = cond->waiters; link != NULL; link = link->next) - ksem_up(&link->sem, 1); - cond->waiters = NULL; -} - -void kcond_broadcast(struct kcond *cond) -{ - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - - kspin_lock(&cond->guard); - kcond_broadcast_guard(cond); - kspin_unlock(&cond->guard); -} - -void krw_sem_init(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - - kcond_init(&sem->cond); - sem->count = 0; - ON_SYNC_DEBUG(sem->magic = KRW_MAGIC); -} - -void krw_sem_done(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(sem->count == 0); - kcond_done(&sem->cond); -} - -void krw_sem_down_r(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(get_preemption_level() == 0); - - kspin_lock(&sem->cond.guard); - while (sem->count < 0) - kcond_wait_guard(&sem->cond); - ++ sem->count; - kspin_unlock(&sem->cond.guard); -} - -int krw_sem_down_r_try(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - - kspin_lock(&sem->cond.guard); - if (sem->count < 0) { - kspin_unlock(&sem->cond.guard); - return -EBUSY; - } - ++ sem->count; - kspin_unlock(&sem->cond.guard); - return 0; -} - -void krw_sem_down_w(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(get_preemption_level() == 0); - - kspin_lock(&sem->cond.guard); - while (sem->count != 0) - kcond_wait_guard(&sem->cond); - sem->count = -1; - kspin_unlock(&sem->cond.guard); -} - -int krw_sem_down_w_try(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - - kspin_lock(&sem->cond.guard); - if (sem->count != 0) { - kspin_unlock(&sem->cond.guard); - return -EBUSY; - } - sem->count = -1; - kspin_unlock(&sem->cond.guard); - return 0; -} - -void krw_sem_up_r(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(sem->count > 0); - - kspin_lock(&sem->cond.guard); - -- sem->count; - if (sem->count == 0) - kcond_broadcast_guard(&sem->cond); - kspin_unlock(&sem->cond.guard); -} - -void krw_sem_up_w(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(sem->count == -1); - - kspin_lock(&sem->cond.guard); - sem->count = 0; - kspin_unlock(&sem->cond.guard); - kcond_broadcast(&sem->cond); -} - -void ksleep_chan_init(struct ksleep_chan *chan) -{ - SLASSERT(chan != NULL); - - kspin_init(&chan->guard); - CFS_INIT_LIST_HEAD(&chan->waiters); - ON_SYNC_DEBUG(chan->magic = KSLEEP_CHAN_MAGIC); -} - -void ksleep_chan_done(struct ksleep_chan *chan) -{ - SLASSERT(chan != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(list_empty(&chan->waiters)); - kspin_done(&chan->guard); -} - -void ksleep_link_init(struct ksleep_link *link) -{ - SLASSERT(link != NULL); - - CFS_INIT_LIST_HEAD(&link->linkage); - link->flags = 0; - link->event = current_thread(); - link->hits = 0; - link->forward = NULL; - ON_SYNC_DEBUG(link->magic = KSLEEP_LINK_MAGIC); -} - -void ksleep_link_done(struct ksleep_link *link) -{ - SLASSERT(link != NULL); - SLASSERT(link->magic == KSLEEP_LINK_MAGIC); - SLASSERT(list_empty(&link->linkage)); -} - -void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link) -{ - SLASSERT(chan != NULL); - SLASSERT(link != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(link->magic == KSLEEP_LINK_MAGIC); - SLASSERT(list_empty(&link->linkage)); - - kspin_lock(&chan->guard); - if (link->flags & KSLEEP_EXCLUSIVE) - list_add_tail(&link->linkage, &chan->waiters); - else - list_add(&link->linkage, &chan->waiters); - kspin_unlock(&chan->guard); -} - -void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link) -{ - SLASSERT(chan != NULL); - SLASSERT(link != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(link->magic == KSLEEP_LINK_MAGIC); - - kspin_lock(&chan->guard); - list_del_init(&link->linkage); - kspin_unlock(&chan->guard); -} - -static int has_hits(struct ksleep_chan *chan, event_t event) -{ - struct ksleep_link *scan; - - SLASSERT(kspin_islocked(&chan->guard)); - list_for_each_entry(scan, &chan->waiters, linkage) { - if (scan->event == event && scan->hits > 0) { - /* consume hit */ - -- scan->hits; - return 1; - } - } - return 0; -} - -static void add_hit(struct ksleep_chan *chan, event_t event) -{ - struct ksleep_link *scan; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - SLASSERT(kspin_islocked(&chan->guard)); - list_for_each_entry(scan, &chan->waiters, linkage) { - if (scan->event == event) { - ++ scan->hits; - break; - } - } -} - -void ksleep_wait(struct ksleep_chan *chan, cfs_task_state_t state) -{ - event_t event; - int result; - - ENTRY; - - SLASSERT(chan != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(get_preemption_level() == 0); - - event = current_thread(); - kspin_lock(&chan->guard); - if (!has_hits(chan, event)) { - result = assert_wait(event, state); - kspin_unlock(&chan->guard); - SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); - if (result == THREAD_WAITING) - thread_block(THREAD_CONTINUE_NULL); - } else - kspin_unlock(&chan->guard); - EXIT; -} - -/* - * Sleep on @chan for no longer than @timeout nano-seconds. Return remaining - * sleep time (non-zero only if thread was waken by a signal (not currently - * implemented), or waitq was already in the "signalled" state). - */ -int64_t ksleep_timedwait(struct ksleep_chan *chan, - cfs_task_state_t state, - __u64 timeout) -{ - event_t event; - - ENTRY; - - SLASSERT(chan != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(get_preemption_level() == 0); - - event = current_thread(); - kspin_lock(&chan->guard); - if (!has_hits(chan, event)) { - int result; - __u64 expire; - result = assert_wait(event, state); - if (timeout > 0) { - /* - * arm a timer. thread_set_timer()'s first argument is - * uint32_t, so we have to cook deadline ourselves. - */ - nanoseconds_to_absolutetime(timeout, &expire); - clock_absolutetime_interval_to_deadline(expire, &expire); - thread_set_timer_deadline(expire); - } - kspin_unlock(&chan->guard); - SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); - if (result == THREAD_WAITING) - result = thread_block(THREAD_CONTINUE_NULL); - thread_cancel_timer(); - - if (result == THREAD_TIMED_OUT) - timeout = 0; - else { - __u64 now; - clock_get_uptime(&now); - if (expire > now) - absolutetime_to_nanoseconds(expire - now, &timeout); - else - timeout = 0; - } - } else { - /* just return timeout, because I've got event and don't need to wait */ - kspin_unlock(&chan->guard); - } - - RETURN(timeout); -} - -/* - * wake up single exclusive waiter (plus some arbitrary number of * - * non-exclusive) - */ -void ksleep_wake(struct ksleep_chan *chan) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - ksleep_wake_nr(chan, 1); -} - -/* - * wake up all waiters on @chan - */ -void ksleep_wake_all(struct ksleep_chan *chan) -{ - ENTRY; - ksleep_wake_nr(chan, 0); - EXIT; -} - -/* - * wakeup no more than @nr exclusive waiters from @chan, plus some arbitrary - * number of non-exclusive. If @nr is 0, wake up all waiters. - */ -void ksleep_wake_nr(struct ksleep_chan *chan, int nr) -{ - struct ksleep_link *scan; - int result; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - SLASSERT(chan != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - - kspin_lock(&chan->guard); - list_for_each_entry(scan, &chan->waiters, linkage) { - struct ksleep_chan *forward; - - forward = scan->forward; - if (forward != NULL) - kspin_lock(&forward->guard); - result = thread_wakeup(scan->event); - SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING); - if (result == KERN_NOT_WAITING) { - ++ scan->hits; - if (forward != NULL) - add_hit(forward, scan->event); - } - if (forward != NULL) - kspin_unlock(&forward->guard); - if ((scan->flags & KSLEEP_EXCLUSIVE) && --nr == 0) - break; - } - kspin_unlock(&chan->guard); -} - -void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg) -{ - SLASSERT(t != NULL); - SLASSERT(func != NULL); - - kspin_init(&t->guard); - t->func = func; - t->arg = arg; - ON_SYNC_DEBUG(t->magic = KTIMER_MAGIC); -} - -void ktimer_done(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - kspin_done(&t->guard); - ON_SYNC_DEBUG(t->magic = 0); -} - -static void ktimer_actor(void *arg0, void *arg1) -{ - struct ktimer *t; - int armed; - - t = arg0; - /* - * this assumes that ktimer's are never freed. - */ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - /* - * call actual timer function - */ - kspin_lock(&t->guard); - armed = t->armed; - t->armed = 0; - kspin_unlock(&t->guard); - - if (armed) - t->func(t->arg); -} - -extern boolean_t thread_call_func_cancel(thread_call_func_t, thread_call_param_t, boolean_t); -extern void thread_call_func_delayed(thread_call_func_t, thread_call_param_t, __u64); - -static void ktimer_disarm_locked(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - thread_call_func_cancel(ktimer_actor, t, FALSE); -} - -/* - * Received deadline is nanoseconds, but time checked by - * thread_call is absolute time (The abstime unit is equal to - * the length of one bus cycle, so the duration is dependent - * on the bus speed of the computer), so we need to convert - * nanotime to abstime by nanoseconds_to_absolutetime(). - * - * Refer to _delayed_call_timer(...) - * - * if thread_call_func_delayed is not exported in the future, - * we can use timeout() or bsd_timeout() to replace it. - */ -void ktimer_arm(struct ktimer *t, u_int64_t deadline) -{ - cfs_time_t abstime; - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - kspin_lock(&t->guard); - ktimer_disarm_locked(t); - t->armed = 1; - nanoseconds_to_absolutetime(deadline, &abstime); - thread_call_func_delayed(ktimer_actor, t, deadline); - kspin_unlock(&t->guard); -} - -void ktimer_disarm(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - kspin_lock(&t->guard); - t->armed = 0; - ktimer_disarm_locked(t); - kspin_unlock(&t->guard); -} - -int ktimer_is_armed(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - /* - * no locking---result is only a hint anyway. - */ - return t->armed; -} - -u_int64_t ktimer_deadline(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - return t->deadline; -} - -void cfs_sync_init(void) -{ -#ifdef __DARWIN8__ - /* Initialize lock group */ - cfs_lock_grp = lck_grp_alloc_init("libcfs sync", LCK_GRP_ATTR_NULL); -#endif -} - -void cfs_sync_fini(void) -{ -#ifdef __DARWIN8__ - /* - * XXX Liang: destroy lock group. As we haven't called lock_done - * for all locks, cfs_lock_grp may not be freed by kernel(reference - * count > 1). - */ - lck_grp_free(cfs_lock_grp); - cfs_lock_grp = NULL; -#endif -} -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/darwin/darwin-tcpip.c b/lnet/libcfs/darwin/darwin-tcpip.c deleted file mode 100644 index c6609a78d6452f47a7e5e08c09e0c1285233795a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-tcpip.c +++ /dev/null @@ -1,1339 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ - -#include <mach/mach_types.h> -#include <sys/file.h> -#include <sys/mount.h> -#include <string.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/sockio.h> -#include <sys/protosw.h> -#include <net/if.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -static __inline__ struct sockaddr_in -blank_sin() -{ - struct sockaddr_in blank = { sizeof(struct sockaddr_in), AF_INET }; - return (blank); -} - -void -libcfs_ipif_free_enumeration (char **names, int n) -{ - int i; - - LASSERT (n > 0); - - for (i = 0; i < n && names[i] != NULL; i++) - LIBCFS_FREE(names[i], IFNAMSIZ); - - LIBCFS_FREE(names, n * sizeof(*names)); -} - -#ifdef __DARWIN8__ -/* - * Darwin 8.x - * - * No hack kernel structre, all using KPI. - */ - -int -libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask) -{ - struct ifreq ifr; - socket_t so; - __u32 val; - int nob; - int rc; - - rc = -sock_socket(PF_INET, SOCK_STREAM, 0, - NULL, NULL, &so); - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return rc; - } - - nob = strnlen(name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - rc = -EINVAL; - goto out; - } - - CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); - bzero(&ifr, sizeof(ifr)); - strcpy(ifr.ifr_name, name); - rc = -sock_ioctl (so, SIOCGIFFLAGS, &ifr); - - if (rc != 0) { - CERROR("Can't get flags for interface %s\n", name); - goto out; - } - - if ((ifr.ifr_flags & IFF_UP) == 0) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = *mask = 0; - goto out; - } - - *up = 1; - - bzero(&ifr, sizeof(ifr)); - strcpy(ifr.ifr_name, name); - *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); - rc = -sock_ioctl(so, SIOCGIFADDR, &ifr); - - if (rc != 0) { - CERROR("Can't get IP address for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - bzero(&ifr, sizeof(ifr)); - strcpy(ifr.ifr_name, name); - *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); - rc = -sock_ioctl(so, SIOCGIFNETMASK, &ifr); - - if (rc != 0) { - CERROR("Can't get netmask for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *mask = ntohl(val); -out: - sock_close(so); - return rc; -} - -int -libcfs_ipif_enumerate (char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - char **names; - int toobig; - int nalloc; - int nfound; - socket_t so; - struct ifreq *ifr; - struct ifconf ifc; - int rc; - int nob; - int i; - - rc = -sock_socket(PF_INET, SOCK_STREAM, 0, - NULL, NULL, &so); - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (rc); - } - - nalloc = 16; /* first guess at max interfaces */ - toobig = 0; - for (;;) { - if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) { - toobig = 1; - nalloc = CFS_PAGE_SIZE/sizeof(*ifr); - CWARN("Too many interfaces: only enumerating first %d\n", - nalloc); - } - - LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); - if (ifr == NULL) { - CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc); - rc = -ENOMEM; - goto out0; - } - - ifc.ifc_buf = (char *)ifr; - ifc.ifc_len = nalloc * sizeof(*ifr); - -#if 1 - /* - * XXX Liang: - * sock_ioctl(..., SIOCGIFCONF, ...) is not supposed to be used in - * kernel space because it always try to copy result to userspace. - * So we can't get interfaces name by sock_ioctl(...,SIOCGIFCONF,...). - * I've created a bug for Apple, let's wait... - */ - nfound = 0; - for (i = 0; i < 16; i++) { - struct ifreq en; - bzero(&en, sizeof(en)); - snprintf(en.ifr_name, IFNAMSIZ, "en%d", i); - rc = -sock_ioctl (so, SIOCGIFFLAGS, &en); - if (rc != 0) - continue; - strcpy(ifr[nfound++].ifr_name, en.ifr_name); - } - -#else /* NOT in using now */ - rc = -sock_ioctl(so, SIOCGIFCONF, (caddr_t)&ifc); - - if (rc < 0) { - CERROR ("Error %d enumerating interfaces\n", rc); - goto out1; - } - - nfound = ifc.ifc_len/sizeof(*ifr); - LASSERT (nfound <= nalloc); -#endif - - if (nfound < nalloc || toobig) - break; - - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - nalloc *= 2; - } - if (nfound == 0) - goto out1; - - LIBCFS_ALLOC(names, nfound * sizeof(*names)); - if (names == NULL) { - rc = -ENOMEM; - goto out1; - } - /* NULL out all names[i] */ - memset (names, 0, nfound * sizeof(*names)); - - for (i = 0; i < nfound; i++) { - - nob = strnlen (ifr[i].ifr_name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, ifr[i].ifr_name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out2; - } - - LIBCFS_ALLOC(names[i], IFNAMSIZ); - if (names[i] == NULL) { - rc = -ENOMEM; - goto out2; - } - - memcpy(names[i], ifr[i].ifr_name, nob); - names[i][nob] = 0; - } - - *namesp = names; - rc = nfound; - -out2: - if (rc < 0) - libcfs_ipif_free_enumeration(names, nfound); -out1: - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); -out0: - sock_close(so); - return rc; - -} - -/* - * Public entry of socket upcall. - * - * so_upcall can only be installed while create/accept of socket in - * Darwin 8.0, so we setup libcfs_sock_upcall() as upcall for all - * sockets in creat/accept, it will call upcall provided by user - * which can be setup after create/accept of socket. - */ -static void libcfs_sock_upcall(socket_t so, void* arg, int waitf) -{ - cfs_socket_t *sock; - - sock = (cfs_socket_t *)arg; - LASSERT(sock->s_magic == CFS_SOCK_MAGIC); - - if ((sock->s_flags & CFS_SOCK_UPCALL) != 0 && sock->s_upcall != NULL) - sock->s_upcall(so, sock->s_upcallarg, waitf); - return; -} - -void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg) -{ - sock->s_upcall = callback; - sock->s_upcallarg = arg; - sock->s_flags |= CFS_SOCK_UPCALL; - return; -} - -void libcfs_sock_reset_cb(cfs_socket_t *sock) -{ - sock->s_flags &= ~CFS_SOCK_UPCALL; - sock->s_upcall = NULL; - sock->s_upcallarg = NULL; - return; -} - -static int -libcfs_sock_create (cfs_socket_t **sockp, int *fatal, - __u32 local_ip, int local_port) -{ - struct sockaddr_in locaddr; - cfs_socket_t *sock; - int option; - int optlen; - int rc; - - /* All errors are fatal except bind failure if the port is in use */ - *fatal = 1; - - sock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO); - if (!sock) { - CERROR("Can't allocate cfs_socket.\n"); - return -ENOMEM; - } - *sockp = sock; - sock->s_magic = CFS_SOCK_MAGIC; - - rc = -sock_socket(PF_INET, SOCK_STREAM, 0, - libcfs_sock_upcall, sock, &C2B_SOCK(sock)); - if (rc != 0) - goto out; - option = 1; - optlen = sizeof(option); - rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, - SO_REUSEADDR, &option, optlen); - if (rc != 0) - goto out; - - /* can't specify a local port without a local IP */ - LASSERT (local_ip == 0 || local_port != 0); - - if (local_ip != 0 || local_port != 0) { - bzero (&locaddr, sizeof (locaddr)); - locaddr.sin_len = sizeof(struct sockaddr_in); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons (local_port); - locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : INADDR_ANY; - rc = -sock_bind(C2B_SOCK(sock), (struct sockaddr *)&locaddr); - if (rc == -EADDRINUSE) { - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *fatal = 0; - goto out; - } - if (rc != 0) { - CERROR("Error trying to bind to port %d: %d\n", - local_port, rc); - goto out; - } - } - return 0; -out: - if (C2B_SOCK(sock) != NULL) - sock_close(C2B_SOCK(sock)); - FREE(sock, M_TEMP); - return rc; -} - -int -libcfs_sock_listen (cfs_socket_t **sockp, - __u32 local_ip, int local_port, int backlog) -{ - cfs_socket_t *sock; - int fatal; - int rc; - - rc = libcfs_sock_create(&sock, &fatal, local_ip, local_port); - if (rc != 0) { - if (!fatal) - CERROR("Can't create socket: port %d already in use\n", - local_port); - return rc; - - } - rc = -sock_listen(C2B_SOCK(sock), backlog); - if (rc == 0) { - *sockp = sock; - return 0; - } - - if (C2B_SOCK(sock) != NULL) - sock_close(C2B_SOCK(sock)); - FREE(sock, M_TEMP); - return rc; -} - -int -libcfs_sock_accept (cfs_socket_t **newsockp, cfs_socket_t *sock) -{ - cfs_socket_t *newsock; - int rc; - - newsock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO); - if (!newsock) { - CERROR("Can't allocate cfs_socket.\n"); - return -ENOMEM; - } - newsock->s_magic = CFS_SOCK_MAGIC; - /* - * thread will sleep in sock_accept by calling of msleep(), - * it can be interrupted because msleep() use PCATCH as argument. - */ - rc = -sock_accept(C2B_SOCK(sock), NULL, 0, 0, - libcfs_sock_upcall, newsock, &C2B_SOCK(newsock)); - if (rc) { - if (C2B_SOCK(newsock) != NULL) - sock_close(C2B_SOCK(newsock)); - FREE(newsock, M_TEMP); - if ((sock->s_flags & CFS_SOCK_DOWN) != 0) - /* shutdown by libcfs_sock_abort_accept(), fake - * error number for lnet_acceptor() */ - rc = -EAGAIN; - return rc; - } - *newsockp = newsock; - return 0; -} - -void -libcfs_sock_abort_accept (cfs_socket_t *sock) -{ - /* - * XXX Liang: - * - * we want to wakeup thread blocked by sock_accept, but we don't - * know the address where thread is sleeping on, so we cannot - * wakeup it directly. - * The thread slept in sock_accept will be waken up while: - * 1. interrupt by signal - * 2. new connection is coming (sonewconn) - * 3. disconnecting of the socket (soisconnected) - * - * Cause we can't send signal to a thread directly(no KPI), so the - * only thing can be done here is disconnect the socket (by - * sock_shutdown() or sth else? ). - * - * Shutdown request of socket with SHUT_WR or SHUT_RDWR will - * be issured to the protocol. - * sock_shutdown()->tcp_usr_shutdown()->tcp_usrclosed()-> - * tcp_close()->soisdisconnected(), it will wakeup thread by - * wakeup((caddr_t)&so->so_timeo); - */ - sock->s_flags |= CFS_SOCK_DOWN; - sock_shutdown(C2B_SOCK(sock), SHUT_RDWR); -} - -int -libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout) -{ - size_t rcvlen; - int rc; - cfs_duration_t to = cfs_time_seconds(timeout); - cfs_time_t then; - struct timeval tv; - - LASSERT(nob > 0); - - for (;;) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0, - }; - cfs_duration_usec(to, &tv); - rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO, - &tv, sizeof(tv)); - if (rc != 0) { - CERROR("Can't set socket recv timeout " - "%ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - - then = cfs_time_current(); - rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen); - to -= cfs_time_current() - then; - - if (rc != 0 && rc != -EWOULDBLOCK) - return rc; - if (rcvlen == nob) - return 0; - - if (to <= 0) - return -EAGAIN; - - buffer = ((char *)buffer) + rcvlen; - nob -= rcvlen; - } - return 0; -} - -int -libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout) -{ - size_t sndlen; - int rc; - cfs_duration_t to = cfs_time_seconds(timeout); - cfs_time_t then; - struct timeval tv; - - LASSERT(nob > 0); - - for (;;) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0, - }; - - if (timeout != 0) { - cfs_duration_usec(to, &tv); - rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO, - &tv, sizeof(tv)); - if (rc != 0) { - CERROR("Can't set socket send timeout " - "%ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - } - - then = cfs_time_current(); - rc = -sock_send(C2B_SOCK(sock), &msg, - ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen); - to -= cfs_time_current() - then; - - if (rc != 0 && rc != -EWOULDBLOCK) - return rc; - if (sndlen == nob) - return 0; - - if (to <= 0) - return -EAGAIN; - buffer = ((char *)buffer) + sndlen; - nob -= sndlen; - } - return 0; - -} - -int -libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port) -{ - struct sockaddr_in sin; - int rc; - - if (remote != 0) - /* Get remote address */ - rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); - else - /* Get local address */ - rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); - if (rc != 0) { - CERROR ("Error %d getting sock %s IP/port\n", - rc, remote ? "peer" : "local"); - return rc; - } - - if (ip != NULL) - *ip = ntohl (sin.sin_addr.s_addr); - - if (port != NULL) - *port = ntohs (sin.sin_port); - return 0; -} - -int -libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize) -{ - int option; - int rc; - - if (txbufsize != 0) { - option = txbufsize; - rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof (option)); - if (rc != 0) { - CERROR ("Can't set send buffer %d: %d\n", - option, rc); - return (rc); - } - } - - if (rxbufsize != 0) { - option = rxbufsize; - rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof (option)); - if (rc != 0) { - CERROR ("Can't set receive buffer %d: %d\n", - option, rc); - return (rc); - } - } - return 0; -} - -int -libcfs_sock_getbuf (cfs_socket_t *sock, int *txbufsize, int *rxbufsize) -{ - int option; - int optlen; - int rc; - - if (txbufsize != NULL) { - optlen = sizeof(option); - rc = -sock_getsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, - (char *)&option, &optlen); - if (rc != 0) { - CERROR ("Can't get send buffer size: %d\n", rc); - return (rc); - } - *txbufsize = option; - } - - if (rxbufsize != NULL) { - optlen = sizeof(option); - rc = -sock_getsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, - (char *)&option, &optlen); - if (rc != 0) { - CERROR ("Can't get receive buffer size: %d\n", rc); - return (rc); - } - *rxbufsize = option; - } - return 0; -} - -void -libcfs_sock_release (cfs_socket_t *sock) -{ - if (C2B_SOCK(sock) != NULL) { - sock_shutdown(C2B_SOCK(sock), 2); - sock_close(C2B_SOCK(sock)); - } - FREE(sock, M_TEMP); -} - -int -libcfs_sock_connect (cfs_socket_t **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port) -{ - cfs_socket_t *sock; - struct sockaddr_in srvaddr; - int rc; - - rc = libcfs_sock_create(&sock, fatal, local_ip, local_port); - if (rc != 0) - return rc; - - bzero(&srvaddr, sizeof(srvaddr)); - srvaddr.sin_len = sizeof(struct sockaddr_in); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(peer_port); - srvaddr.sin_addr.s_addr = htonl(peer_ip); - - rc = -sock_connect(C2B_SOCK(sock), (struct sockaddr *)&srvaddr, 0); - if (rc == 0) { - *sockp = sock; - return 0; - } - - *fatal = !(rc == -EADDRNOTAVAIL || rc == -EADDRINUSE); - CDEBUG(*fatal ? D_NETERROR : D_NET, - "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, - HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); - - libcfs_sock_release(sock); - return rc; -} - -#else /* !__DARWIN8__ */ - -/* - * To use bigger buffer for socket: - * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so - * we must patch kernel). - * 2. Increase net.inet.tcp.reass.maxsegments - * 3. Increase net.inet.tcp.sendspace - * 4. Increase net.inet.tcp.recvspace - * 5. Increase kern.ipc.maxsockbuf - */ -#define KSOCK_MAX_BUF (1152*1024) - -int -libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask) -{ - struct socket *so; - struct ifreq ifr; - int nob; - int rc; - __u32 val; - CFS_DECL_FUNNEL_DATA; - - CFS_NET_IN; - rc = socreate(PF_INET, &so, SOCK_STREAM, 0); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (-rc); - } - nob = strnlen(name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - rc = -EINVAL; - goto out; - } - - CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); - strcpy(ifr.ifr_name, name); - CFS_NET_IN; - rc = ifioctl(so, SIOCGIFFLAGS, (caddr_t)&ifr, current_proc()); - CFS_NET_EX; - - if (rc != 0) { - CERROR("Can't get flags for interface %s\n", name); - goto out; - } - if ((ifr.ifr_flags & IFF_UP) == 0) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = *mask = 0; - goto out; - } - - *up = 1; - strcpy(ifr.ifr_name, name); - *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); - CFS_NET_IN; - rc = ifioctl(so, SIOCGIFADDR, (caddr_t)&ifr, current_proc()); - CFS_NET_EX; - - if (rc != 0) { - CERROR("Can't get IP address for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - strcpy(ifr.ifr_name, name); - *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); - CFS_NET_IN; - rc = ifioctl(so, SIOCGIFNETMASK, (caddr_t)&ifr, current_proc()); - CFS_NET_EX; - - if (rc != 0) { - CERROR("Can't get netmask for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *mask = ntohl(val); -out: - CFS_NET_IN; - soclose(so); - CFS_NET_EX; - return -rc; -} - -int -libcfs_ipif_enumerate (char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - char **names; - int toobig; - int nalloc; - int nfound; - struct socket *so; - struct ifreq *ifr; - struct ifconf ifc; - int rc; - int nob; - int i; - CFS_DECL_FUNNEL_DATA; - - CFS_NET_IN; - rc = socreate(PF_INET, &so, SOCK_STREAM, 0); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (-rc); - } - - nalloc = 16; /* first guess at max interfaces */ - toobig = 0; - for (;;) { - if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) { - toobig = 1; - nalloc = CFS_PAGE_SIZE/sizeof(*ifr); - CWARN("Too many interfaces: only enumerating first %d\n", - nalloc); - } - - LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); - if (ifr == NULL) { - CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc); - rc = -ENOMEM; - goto out0; - } - - ifc.ifc_buf = (char *)ifr; - ifc.ifc_len = nalloc * sizeof(*ifr); - - CFS_NET_IN; - rc = -ifioctl(so, SIOCGIFCONF, (caddr_t)&ifc, current_proc()); - CFS_NET_EX; - - if (rc < 0) { - CERROR ("Error %d enumerating interfaces\n", rc); - goto out1; - } - - nfound = ifc.ifc_len/sizeof(*ifr); - LASSERT (nfound <= nalloc); - - if (nfound < nalloc || toobig) - break; - - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - nalloc *= 2; - } - if (nfound == 0) - goto out1; - - LIBCFS_ALLOC(names, nfound * sizeof(*names)); - if (names == NULL) { - rc = -ENOMEM; - goto out1; - } - /* NULL out all names[i] */ - memset (names, 0, nfound * sizeof(*names)); - - for (i = 0; i < nfound; i++) { - - nob = strnlen (ifr[i].ifr_name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, ifr[i].ifr_name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out2; - } - - LIBCFS_ALLOC(names[i], IFNAMSIZ); - if (names[i] == NULL) { - rc = -ENOMEM; - goto out2; - } - - memcpy(names[i], ifr[i].ifr_name, nob); - names[i][nob] = 0; - } - - *namesp = names; - rc = nfound; - -out2: - if (rc < 0) - libcfs_ipif_free_enumeration(names, nfound); -out1: - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); -out0: - CFS_NET_IN; - soclose(so); - CFS_NET_EX; - return rc; -} - -static int -libcfs_sock_create (struct socket **sockp, int *fatal, - __u32 local_ip, int local_port) -{ - struct sockaddr_in locaddr; - struct socket *so; - struct sockopt sopt; - int option; - int rc; - CFS_DECL_FUNNEL_DATA; - - *fatal = 1; - CFS_NET_IN; - rc = socreate(PF_INET, &so, SOCK_STREAM, 0); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (-rc); - } - - bzero(&sopt, sizeof sopt); - option = 1; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_REUSEADDR; - sopt.sopt_val = &option; - sopt.sopt_valsize = sizeof(option); - CFS_NET_IN; - rc = sosetopt(so, &sopt); - if (rc != 0) { - CFS_NET_EX; - CERROR ("Can't set sock reuse address: %d\n", rc); - goto out; - } - /* can't specify a local port without a local IP */ - LASSERT (local_ip == 0 || local_port != 0); - - if (local_ip != 0 || local_port != 0) { - bzero (&locaddr, sizeof (locaddr)); - locaddr.sin_len = sizeof(struct sockaddr_in); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons (local_port); - locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : - INADDR_ANY; - - rc = sobind(so, (struct sockaddr *)&locaddr); - if (rc == EADDRINUSE) { - CFS_NET_EX; - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *fatal = 0; - goto out; - } - if (rc != 0) { - CFS_NET_EX; - CERROR ("Can't bind to local IP Address %u.%u.%u.%u: %d\n", - HIPQUAD(local_ip), rc); - goto out; - } - } - *sockp = so; - return 0; -out: - CFS_NET_IN; - soclose(so); - CFS_NET_EX; - return -rc; -} - -int -libcfs_sock_listen (struct socket **sockp, - __u32 local_ip, int local_port, int backlog) -{ - int fatal; - int rc; - CFS_DECL_FUNNEL_DATA; - - rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port); - if (rc != 0) { - if (!fatal) - CERROR("Can't create socket: port %d already in use\n", - local_port); - return rc; - } - CFS_NET_IN; - rc = solisten(*sockp, backlog); - CFS_NET_EX; - if (rc == 0) - return 0; - CERROR("Can't set listen backlog %d: %d\n", backlog, rc); - CFS_NET_IN; - soclose(*sockp); - CFS_NET_EX; - return -rc; -} - -int -libcfs_sock_accept (struct socket **newsockp, struct socket *sock) -{ - struct socket *so; - struct sockaddr *sa; - int error, s; - CFS_DECL_FUNNEL_DATA; - - CFS_NET_IN; - s = splnet(); - if ((sock->so_options & SO_ACCEPTCONN) == 0) { - splx(s); - CFS_NET_EX; - return (-EINVAL); - } - - if ((sock->so_state & SS_NBIO) && sock->so_comp.tqh_first == NULL) { - splx(s); - CFS_NET_EX; - return (-EWOULDBLOCK); - } - - error = 0; - while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) { - if (sock->so_state & SS_CANTRCVMORE) { - sock->so_error = ECONNABORTED; - break; - } - error = tsleep((caddr_t)&sock->so_timeo, PSOCK | PCATCH, - "accept", 0); - if (error) { - splx(s); - CFS_NET_EX; - return (-error); - } - } - if (sock->so_error) { - error = sock->so_error; - sock->so_error = 0; - splx(s); - CFS_NET_EX; - return (-error); - } - - /* - * At this point we know that there is at least one connection - * ready to be accepted. Remove it from the queue prior to - * allocating the file descriptor for it since falloc() may - * block allowing another process to accept the connection - * instead. - */ - so = TAILQ_FIRST(&sock->so_comp); - TAILQ_REMOVE(&sock->so_comp, so, so_list); - sock->so_qlen--; - - so->so_state &= ~SS_COMP; - so->so_head = NULL; - sa = 0; - (void) soaccept(so, &sa); - - *newsockp = so; - FREE(sa, M_SONAME); - splx(s); - CFS_NET_EX; - return (-error); -} - -void -libcfs_sock_abort_accept (struct socket *sock) -{ - wakeup(&sock->so_timeo); -} - -/* - * XXX Liang: timeout for write is not supported yet. - */ -int -libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - CFS_DECL_NET_DATA; - - while (nob > 0) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct uio suio = { - .uio_iov = &iov, - .uio_iovcnt = 1, - .uio_offset = 0, - .uio_resid = nob, - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_WRITE, - .uio_procp = NULL - }; - - CFS_NET_IN; - rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, 0); - CFS_NET_EX; - - if (rc != 0) { - if ( suio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\ - rc == EWOULDBLOCK)) - rc = 0; - if ( rc != 0 ) - return -rc; - rc = nob - suio.uio_resid; - buffer = ((char *)buffer) + rc; - nob = suio.uio_resid; - continue; - } - break; - } - return (0); -} - -/* - * XXX Liang: timeout for read is not supported yet. - */ -int -libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - CFS_DECL_NET_DATA; - - while (nob > 0) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct uio ruio = { - .uio_iov = &iov, - .uio_iovcnt = 1, - .uio_offset = 0, - .uio_resid = nob, - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_READ, - .uio_procp = NULL - }; - - CFS_NET_IN; - rc = soreceive(sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, (int *)0); - CFS_NET_EX; - - if (rc != 0) { - if ( ruio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\ - rc == EWOULDBLOCK)) - rc = 0; - if (rc != 0) - return -rc; - rc = nob - ruio.uio_resid; - buffer = ((char *)buffer) + rc; - nob = ruio.uio_resid; - continue; - } - break; - } - return (0); -} - -int -libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize) -{ - struct sockopt sopt; - int rc = 0; - int option; - CFS_DECL_NET_DATA; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_val = &option; - sopt.sopt_valsize = sizeof(option); - - if (txbufsize != 0) { - option = txbufsize; - if (option > KSOCK_MAX_BUF) - option = KSOCK_MAX_BUF; - - sopt.sopt_name = SO_SNDBUF; - CFS_NET_IN; - rc = sosetopt(sock, &sopt); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't set send buffer %d: %d\n", - option, rc); - - return -rc; - } - } - - if (rxbufsize != 0) { - option = rxbufsize; - sopt.sopt_name = SO_RCVBUF; - CFS_NET_IN; - rc = sosetopt(sock, &sopt); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't set receive buffer %d: %d\n", - option, rc); - return -rc; - } - } - return 0; -} - -int -libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port) -{ - struct sockaddr_in *sin; - struct sockaddr *sa = NULL; - int rc; - CFS_DECL_NET_DATA; - - if (remote != 0) { - CFS_NET_IN; - rc = sock->so_proto->pr_usrreqs->pru_peeraddr(sock, &sa); - CFS_NET_EX; - - if (rc != 0) { - if (sa) FREE(sa, M_SONAME); - CERROR ("Error %d getting sock peer IP\n", rc); - return -rc; - } - } else { - CFS_NET_IN; - rc = sock->so_proto->pr_usrreqs->pru_sockaddr(sock, &sa); - CFS_NET_EX; - if (rc != 0) { - if (sa) FREE(sa, M_SONAME); - CERROR ("Error %d getting sock local IP\n", rc); - return -rc; - } - } - if (sa != NULL) { - sin = (struct sockaddr_in *)sa; - if (ip != NULL) - *ip = ntohl (sin->sin_addr.s_addr); - if (port != NULL) - *port = ntohs (sin->sin_port); - if (sa) - FREE(sa, M_SONAME); - } - return 0; -} - -int -libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize) -{ - struct sockopt sopt; - int rc; - CFS_DECL_NET_DATA; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_GET; - sopt.sopt_level = SOL_SOCKET; - - if (txbufsize != NULL) { - sopt.sopt_val = txbufsize; - sopt.sopt_valsize = sizeof(*txbufsize); - sopt.sopt_name = SO_SNDBUF; - CFS_NET_IN; - rc = sogetopt(sock, &sopt); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't get send buffer size: %d\n", rc); - return -rc; - } - } - - if (rxbufsize != NULL) { - sopt.sopt_val = rxbufsize; - sopt.sopt_valsize = sizeof(*rxbufsize); - sopt.sopt_name = SO_RCVBUF; - CFS_NET_IN; - rc = sogetopt(sock, &sopt); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't get receive buffer size: %d\n", rc); - return -rc; - } - } - return 0; -} - -int -libcfs_sock_connect (struct socket **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port) -{ - struct sockaddr_in srvaddr; - struct socket *so; - int s; - int rc; - CFS_DECL_FUNNEL_DATA; - - rc = libcfs_sock_create(sockp, fatal, local_ip, local_port); - if (rc != 0) - return rc; - so = *sockp; - bzero(&srvaddr, sizeof(srvaddr)); - srvaddr.sin_len = sizeof(struct sockaddr_in); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons (peer_port); - srvaddr.sin_addr.s_addr = htonl (peer_ip); - - CFS_NET_IN; - rc = soconnect(so, (struct sockaddr *)&srvaddr); - if (rc != 0) { - CFS_NET_EX; - if (rc != EADDRNOTAVAIL && rc != EADDRINUSE) - CDEBUG(D_NETERROR, - "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, - HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); - goto out; - } - s = splnet(); - while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { - CDEBUG(D_NET, "ksocknal sleep for waiting auto_connect.\n"); - (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "ksocknal_conn", hz); - } - if ((rc = so->so_error) != 0) { - so->so_error = 0; - splx(s); - CFS_NET_EX; - CDEBUG(D_NETERROR, - "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, - HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); - goto out; - } - LASSERT(so->so_state & SS_ISCONNECTED); - splx(s); - CFS_NET_EX; - if (sockp) - *sockp = so; - return (0); -out: - CFS_NET_IN; - soshutdown(so, 2); - soclose(so); - CFS_NET_EX; - return (-rc); -} - -void -libcfs_sock_release (struct socket *sock) -{ - CFS_DECL_FUNNEL_DATA; - CFS_NET_IN; - soshutdown(sock, 0); - CFS_NET_EX; -} - -#endif diff --git a/lnet/libcfs/darwin/darwin-tracefile.c b/lnet/libcfs/darwin/darwin-tracefile.c deleted file mode 100644 index e672ad5b36a761b6c171aba94a87d7b08de6b332..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-tracefile.c +++ /dev/null @@ -1,191 +0,0 @@ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -/* - * We can't support smp tracefile currently. - * Everything is put on one cpu. - */ - -#define M_TCD_MAX_PAGES (128 * 1280) - -static long max_permit_mb = (64 * 1024); - -spinlock_t trace_cpu_serializer; - -/* - * thread currently executing tracefile code or NULL if none does. Used to - * detect recursive calls to libcfs_debug_msg(). - */ -static thread_t trace_owner = NULL; - -extern int get_preemption_level(void); -extern atomic_t tage_allocated; - -struct rw_semaphore tracefile_sem; - -int tracefile_init_arch() { - init_rwsem(&tracefile_sem); -#error "Todo: initialise per-cpu console buffers" - return 0; -} - -void tracefile_fini_arch() { -} - -void tracefile_read_lock() { - down_read(&tracefile_sem); -} - -void tracefile_read_unlock() { - up_read(&tracefile_sem); -} - -void tracefile_write_lock() { - down_write(&tracefile_sem); -} - -void tracefile_write_unlock() { - up_write(&tracefile_sem); -} - -char *trace_get_console_buffer(void) -{ -#error "todo: return a per-cpu/interrupt console buffer and disable pre-emption" -} - -void trace_put_console_buffer(char *buffer) -{ -#error "todo: re-enable pre-emption" -} - -struct trace_cpu_data *trace_get_tcd(void) -{ - struct trace_cpu_data *tcd; - int nr_pages; - struct list_head pages; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - /* - * debugging check for recursive call to libcfs_debug_msg() - */ - if (trace_owner == current_thread()) { - /* - * Cannot assert here. - */ - printk(KERN_EMERG "recursive call to %s", __FUNCTION__); - /* - * "The death of God left the angels in a strange position." - */ - cfs_enter_debugger(); - } - tcd = &trace_data[0].tcd; - CFS_INIT_LIST_HEAD(&pages); - if (get_preemption_level() == 0) - nr_pages = trace_refill_stock(tcd, CFS_ALLOC_STD, &pages); - else - nr_pages = 0; - spin_lock(&trace_cpu_serializer); - trace_owner = current_thread(); - tcd->tcd_cur_stock_pages += nr_pages; - list_splice(&pages, &tcd->tcd_stock_pages); - return tcd; -} - -extern void raw_page_death_row_clean(void); - -void __trace_put_tcd(struct trace_cpu_data *tcd) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - LASSERT(trace_owner == current_thread()); - trace_owner = NULL; - spin_unlock(&trace_cpu_serializer); - if (get_preemption_level() == 0) - /* purge all pending pages */ - raw_page_death_row_clean(); -} - -int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - /* XNU has global tcd, and all pages are owned by it */ - return 1; -} - -void -set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, - const int line, unsigned long stack) -{ - struct timeval tv; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - do_gettimeofday(&tv); - header->ph_subsys = subsys; - header->ph_mask = mask; - header->ph_cpu_id = smp_processor_id(); - header->ph_sec = (__u32)tv.tv_sec; - header->ph_usec = tv.tv_usec; - header->ph_stack = stack; - header->ph_pid = cfs_curproc_pid(); - header->ph_line_num = line; - header->ph_extern_pid = (__u32)current_thread(); -} - -void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, - int len, const char *file, const char *fn) -{ - char *prefix = "Lustre", *ptype = KERN_INFO; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - if ((mask & D_EMERG) != 0) { - prefix = "LustreError"; - ptype = KERN_EMERG; - } else if ((mask & D_ERROR) != 0) { - prefix = "LustreError"; - ptype = KERN_ERR; - } else if ((mask & D_WARNING) != 0) { - prefix = "Lustre"; - ptype = KERN_WARNING; - } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) { - prefix = "Lustre"; - ptype = KERN_INFO; - } - - if ((mask & D_CONSOLE) != 0) { - printk("%s%s: %.*s", ptype, prefix, len, buf); - } else { - printk("%s%s: %d:%d:(%s:%d:%s()) %*s", - ptype, prefix, hdr->ph_pid, hdr->ph_extern_pid, - file, hdr->ph_line_num, fn, len, buf); - } -} - -int trace_max_debug_mb(void) -{ - return max_permit_mb; -} - -void -trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) -{ -#error "tbd" -} diff --git a/lnet/libcfs/darwin/darwin-utils.c b/lnet/libcfs/darwin/darwin-utils.c deleted file mode 100644 index cfd7a2d6a8b533bd9005a766094df8ae1f9f091e..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-utils.c +++ /dev/null @@ -1,578 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <mach/mach_types.h> -#include <string.h> -#include <sys/errno.h> -#include <sys/types.h> -#include <sys/fcntl.h> -#include <lnet/types.h> - -#include <libcfs/kp30.h> - -#ifndef isspace -inline int -isspace(char c) -{ - return (c == ' ' || c == '\t' || c == '\n' || c == '\12'); -} -#endif - -char * strpbrk(const char * cs,const char * ct) -{ - const char *sc1,*sc2; - - for( sc1 = cs; *sc1 != '\0'; ++sc1) { - for( sc2 = ct; *sc2 != '\0'; ++sc2) { - if (*sc1 == *sc2) - return (char *) sc1; - } - } - return NULL; -} - -char * strsep(char **s, const char *ct) -{ - char *sbegin = *s, *end; - - if (sbegin == NULL) - return NULL; - end = strpbrk(sbegin, ct); - if (end != NULL) - *end++ = '\0'; - *s = end; - - return sbegin; -} - -size_t strnlen(const char * s, size_t count) -{ - const char *sc; - - for (sc = s; count-- && *sc != '\0'; ++sc) - /* nothing */; - return sc - s; -} - -char * -strstr(const char *in, const char *str) -{ - char c; - size_t len; - - c = *str++; - if (!c) - return (char *) in; // Trivial empty string case - len = strlen(str); - do { - char sc; - do { - sc = *in++; - if (!sc) - return (char *) 0; - } while (sc != c); - } while (strncmp(in, str, len) != 0); - return (char *) (in - 1); -} - -char * -strrchr(const char *p, int ch) -{ - const char *end = p + strlen(p); - do { - if (*end == (char)ch) - return (char *)end; - } while (--end >= p); - return NULL; -} - -char * -ul2dstr(unsigned long address, char *buf, int len) -{ - char *pos = buf + len - 1; - - if (len <= 0 || !buf) - return NULL; - *pos = 0; - while (address) { - if (!--len) break; - *--pos = address % 10 + '0'; - address /= 10; - } - return pos; -} - -/* - * miscellaneous libcfs stuff - */ - -/* - * Convert server error code to client format. - * Linux errno.h. - */ - -/* obtained by - * - * cc /usr/include/asm/errno.h -E -dM | grep '#define E' | sort -n -k3,3 - * - */ -enum linux_errnos { - LINUX_EPERM = 1, - LINUX_ENOENT = 2, - LINUX_ESRCH = 3, - LINUX_EINTR = 4, - LINUX_EIO = 5, - LINUX_ENXIO = 6, - LINUX_E2BIG = 7, - LINUX_ENOEXEC = 8, - LINUX_EBADF = 9, - LINUX_ECHILD = 10, - LINUX_EAGAIN = 11, - LINUX_ENOMEM = 12, - LINUX_EACCES = 13, - LINUX_EFAULT = 14, - LINUX_ENOTBLK = 15, - LINUX_EBUSY = 16, - LINUX_EEXIST = 17, - LINUX_EXDEV = 18, - LINUX_ENODEV = 19, - LINUX_ENOTDIR = 20, - LINUX_EISDIR = 21, - LINUX_EINVAL = 22, - LINUX_ENFILE = 23, - LINUX_EMFILE = 24, - LINUX_ENOTTY = 25, - LINUX_ETXTBSY = 26, - LINUX_EFBIG = 27, - LINUX_ENOSPC = 28, - LINUX_ESPIPE = 29, - LINUX_EROFS = 30, - LINUX_EMLINK = 31, - LINUX_EPIPE = 32, - LINUX_EDOM = 33, - LINUX_ERANGE = 34, - LINUX_EDEADLK = 35, - LINUX_ENAMETOOLONG = 36, - LINUX_ENOLCK = 37, - LINUX_ENOSYS = 38, - LINUX_ENOTEMPTY = 39, - LINUX_ELOOP = 40, - LINUX_ENOMSG = 42, - LINUX_EIDRM = 43, - LINUX_ECHRNG = 44, - LINUX_EL2NSYNC = 45, - LINUX_EL3HLT = 46, - LINUX_EL3RST = 47, - LINUX_ELNRNG = 48, - LINUX_EUNATCH = 49, - LINUX_ENOCSI = 50, - LINUX_EL2HLT = 51, - LINUX_EBADE = 52, - LINUX_EBADR = 53, - LINUX_EXFULL = 54, - LINUX_ENOANO = 55, - LINUX_EBADRQC = 56, - LINUX_EBADSLT = 57, - LINUX_EBFONT = 59, - LINUX_ENOSTR = 60, - LINUX_ENODATA = 61, - LINUX_ETIME = 62, - LINUX_ENOSR = 63, - LINUX_ENONET = 64, - LINUX_ENOPKG = 65, - LINUX_EREMOTE = 66, - LINUX_ENOLINK = 67, - LINUX_EADV = 68, - LINUX_ESRMNT = 69, - LINUX_ECOMM = 70, - LINUX_EPROTO = 71, - LINUX_EMULTIHOP = 72, - LINUX_EDOTDOT = 73, - LINUX_EBADMSG = 74, - LINUX_EOVERFLOW = 75, - LINUX_ENOTUNIQ = 76, - LINUX_EBADFD = 77, - LINUX_EREMCHG = 78, - LINUX_ELIBACC = 79, - LINUX_ELIBBAD = 80, - LINUX_ELIBSCN = 81, - LINUX_ELIBMAX = 82, - LINUX_ELIBEXEC = 83, - LINUX_EILSEQ = 84, - LINUX_ERESTART = 85, - LINUX_ESTRPIPE = 86, - LINUX_EUSERS = 87, - LINUX_ENOTSOCK = 88, - LINUX_EDESTADDRREQ = 89, - LINUX_EMSGSIZE = 90, - LINUX_EPROTOTYPE = 91, - LINUX_ENOPROTOOPT = 92, - LINUX_EPROTONOSUPPORT = 93, - LINUX_ESOCKTNOSUPPORT = 94, - LINUX_EOPNOTSUPP = 95, - LINUX_EPFNOSUPPORT = 96, - LINUX_EAFNOSUPPORT = 97, - LINUX_EADDRINUSE = 98, - LINUX_EADDRNOTAVAIL = 99, - LINUX_ENETDOWN = 100, - LINUX_ENETUNREACH = 101, - LINUX_ENETRESET = 102, - LINUX_ECONNABORTED = 103, - LINUX_ECONNRESET = 104, - LINUX_ENOBUFS = 105, - LINUX_EISCONN = 106, - LINUX_ENOTCONN = 107, - LINUX_ESHUTDOWN = 108, - LINUX_ETOOMANYREFS = 109, - LINUX_ETIMEDOUT = 110, - LINUX_ECONNREFUSED = 111, - LINUX_EHOSTDOWN = 112, - LINUX_EHOSTUNREACH = 113, - LINUX_EALREADY = 114, - LINUX_EINPROGRESS = 115, - LINUX_ESTALE = 116, - LINUX_EUCLEAN = 117, - LINUX_ENOTNAM = 118, - LINUX_ENAVAIL = 119, - LINUX_EISNAM = 120, - LINUX_EREMOTEIO = 121, - LINUX_EDQUOT = 122, - LINUX_ENOMEDIUM = 123, - LINUX_EMEDIUMTYPE = 124, - - /* - * we don't need these, but for completeness.. - */ - LINUX_EDEADLOCK = LINUX_EDEADLK, - LINUX_EWOULDBLOCK = LINUX_EAGAIN -}; - -int convert_server_error(__u64 ecode) -{ - int sign; - int code; - - static int errno_xlate[] = { - /* success is always success */ - [0] = 0, - [LINUX_EPERM] = EPERM, - [LINUX_ENOENT] = ENOENT, - [LINUX_ESRCH] = ESRCH, - [LINUX_EINTR] = EINTR, - [LINUX_EIO] = EIO, - [LINUX_ENXIO] = ENXIO, - [LINUX_E2BIG] = E2BIG, - [LINUX_ENOEXEC] = ENOEXEC, - [LINUX_EBADF] = EBADF, - [LINUX_ECHILD] = ECHILD, - [LINUX_EAGAIN] = EAGAIN, - [LINUX_ENOMEM] = ENOMEM, - [LINUX_EACCES] = EACCES, - [LINUX_EFAULT] = EFAULT, - [LINUX_ENOTBLK] = ENOTBLK, - [LINUX_EBUSY] = EBUSY, - [LINUX_EEXIST] = EEXIST, - [LINUX_EXDEV] = EXDEV, - [LINUX_ENODEV] = ENODEV, - [LINUX_ENOTDIR] = ENOTDIR, - [LINUX_EISDIR] = EISDIR, - [LINUX_EINVAL] = EINVAL, - [LINUX_ENFILE] = ENFILE, - [LINUX_EMFILE] = EMFILE, - [LINUX_ENOTTY] = ENOTTY, - [LINUX_ETXTBSY] = ETXTBSY, - [LINUX_EFBIG] = EFBIG, - [LINUX_ENOSPC] = ENOSPC, - [LINUX_ESPIPE] = ESPIPE, - [LINUX_EROFS] = EROFS, - [LINUX_EMLINK] = EMLINK, - [LINUX_EPIPE] = EPIPE, - [LINUX_EDOM] = EDOM, - [LINUX_ERANGE] = ERANGE, - [LINUX_EDEADLK] = EDEADLK, - [LINUX_ENAMETOOLONG] = ENAMETOOLONG, - [LINUX_ENOLCK] = ENOLCK, - [LINUX_ENOSYS] = ENOSYS, - [LINUX_ENOTEMPTY] = ENOTEMPTY, - [LINUX_ELOOP] = ELOOP, - [LINUX_ENOMSG] = ENOMSG, - [LINUX_EIDRM] = EIDRM, - [LINUX_ECHRNG] = EINVAL /* ECHRNG */, - [LINUX_EL2NSYNC] = EINVAL /* EL2NSYNC */, - [LINUX_EL3HLT] = EINVAL /* EL3HLT */, - [LINUX_EL3RST] = EINVAL /* EL3RST */, - [LINUX_ELNRNG] = EINVAL /* ELNRNG */, - [LINUX_EUNATCH] = EINVAL /* EUNATCH */, - [LINUX_ENOCSI] = EINVAL /* ENOCSI */, - [LINUX_EL2HLT] = EINVAL /* EL2HLT */, - [LINUX_EBADE] = EINVAL /* EBADE */, - [LINUX_EBADR] = EBADRPC, - [LINUX_EXFULL] = EINVAL /* EXFULL */, - [LINUX_ENOANO] = EINVAL /* ENOANO */, - [LINUX_EBADRQC] = EINVAL /* EBADRQC */, - [LINUX_EBADSLT] = EINVAL /* EBADSLT */, - [LINUX_EBFONT] = EINVAL /* EBFONT */, - [LINUX_ENOSTR] = EINVAL /* ENOSTR */, - [LINUX_ENODATA] = EINVAL /* ENODATA */, - [LINUX_ETIME] = EINVAL /* ETIME */, - [LINUX_ENOSR] = EINVAL /* ENOSR */, - [LINUX_ENONET] = EINVAL /* ENONET */, - [LINUX_ENOPKG] = EINVAL /* ENOPKG */, - [LINUX_EREMOTE] = EREMOTE, - [LINUX_ENOLINK] = EINVAL /* ENOLINK */, - [LINUX_EADV] = EINVAL /* EADV */, - [LINUX_ESRMNT] = EINVAL /* ESRMNT */, - [LINUX_ECOMM] = EINVAL /* ECOMM */, - [LINUX_EPROTO] = EPROTOTYPE, - [LINUX_EMULTIHOP] = EINVAL /* EMULTIHOP */, - [LINUX_EDOTDOT] = EINVAL /* EDOTDOT */, - [LINUX_EBADMSG] = EINVAL /* EBADMSG */, - [LINUX_EOVERFLOW] = EOVERFLOW, - [LINUX_ENOTUNIQ] = EINVAL /* ENOTUNIQ */, - [LINUX_EBADFD] = EINVAL /* EBADFD */, - [LINUX_EREMCHG] = EINVAL /* EREMCHG */, - [LINUX_ELIBACC] = EINVAL /* ELIBACC */, - [LINUX_ELIBBAD] = EINVAL /* ELIBBAD */, - [LINUX_ELIBSCN] = EINVAL /* ELIBSCN */, - [LINUX_ELIBMAX] = EINVAL /* ELIBMAX */, - [LINUX_ELIBEXEC] = EINVAL /* ELIBEXEC */, - [LINUX_EILSEQ] = EILSEQ, - [LINUX_ERESTART] = EINVAL /* because ERESTART is - * negative in XNU */, - [LINUX_ESTRPIPE] = EINVAL /* ESTRPIPE */, - [LINUX_EUSERS] = EUSERS, - [LINUX_ENOTSOCK] = ENOTSOCK, - [LINUX_EDESTADDRREQ] = EDESTADDRREQ, - [LINUX_EMSGSIZE] = EMSGSIZE, - [LINUX_EPROTOTYPE] = EPROTOTYPE, - [LINUX_ENOPROTOOPT] = ENOPROTOOPT, - [LINUX_EPROTONOSUPPORT] = EPROTONOSUPPORT, - [LINUX_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT, - [LINUX_EOPNOTSUPP] = EOPNOTSUPP, - [LINUX_EPFNOSUPPORT] = EPFNOSUPPORT, - [LINUX_EAFNOSUPPORT] = EAFNOSUPPORT, - [LINUX_EADDRINUSE] = EADDRINUSE, - [LINUX_EADDRNOTAVAIL] = EADDRNOTAVAIL, - [LINUX_ENETDOWN] = ENETDOWN, - [LINUX_ENETUNREACH] = ENETUNREACH, - [LINUX_ENETRESET] = ENETRESET, - [LINUX_ECONNABORTED] = ECONNABORTED, - [LINUX_ECONNRESET] = ECONNRESET, - [LINUX_ENOBUFS] = ENOBUFS, - [LINUX_EISCONN] = EISCONN, - [LINUX_ENOTCONN] = ENOTCONN, - [LINUX_ESHUTDOWN] = ESHUTDOWN, - [LINUX_ETOOMANYREFS] = ETOOMANYREFS, - [LINUX_ETIMEDOUT] = ETIMEDOUT, - [LINUX_ECONNREFUSED] = ECONNREFUSED, - [LINUX_EHOSTDOWN] = EHOSTDOWN, - [LINUX_EHOSTUNREACH] = EHOSTUNREACH, - [LINUX_EALREADY] = EALREADY, - [LINUX_EINPROGRESS] = EINPROGRESS, - [LINUX_ESTALE] = ESTALE, - [LINUX_EUCLEAN] = EINVAL /* EUCLEAN */, - [LINUX_ENOTNAM] = EINVAL /* ENOTNAM */, - [LINUX_ENAVAIL] = EINVAL /* ENAVAIL */, - [LINUX_EISNAM] = EINVAL /* EISNAM */, - [LINUX_EREMOTEIO] = EINVAL /* EREMOTEIO */, - [LINUX_EDQUOT] = EDQUOT, - [LINUX_ENOMEDIUM] = EINVAL /* ENOMEDIUM */, - [LINUX_EMEDIUMTYPE] = EINVAL /* EMEDIUMTYPE */, - }; - code = (int)ecode; - if (code >= 0) { - sign = +1; - } else { - sign = -1; - code = -code; - } - if (code < (sizeof errno_xlate) / (sizeof errno_xlate[0])) { - code = errno_xlate[code]; - LASSERT(code >= 0); - } - return sign * code; -} - -enum { - LINUX_O_RDONLY = 00, - LINUX_O_WRONLY = 01, - LINUX_O_RDWR = 02, - LINUX_O_CREAT = 0100, - LINUX_O_EXCL = 0200, - LINUX_O_NOCTTY = 0400, - LINUX_O_TRUNC = 01000, - LINUX_O_APPEND = 02000, - LINUX_O_NONBLOCK = 04000, - LINUX_O_NDELAY = LINUX_O_NONBLOCK, - LINUX_O_SYNC = 010000, - LINUX_O_FSYNC = LINUX_O_SYNC, - LINUX_O_ASYNC = 020000, - LINUX_O_DIRECT = 040000, - LINUX_O_NOFOLLOW = 0400000 -}; - -static inline void obit_convert(int *cflag, int *sflag, - unsigned cmask, unsigned smask) -{ - if (*cflag & cmask != 0) { - *sflag |= smask; - *cflag &= ~cmask; - } -} - -/* - * convert <fcntl.h> flag from XNU client to Linux _i386_ server. - */ -int convert_client_oflag(int cflag, int *result) -{ - int sflag = 0; - - cflag = 0; - obit_convert(&cflag, &sflag, O_RDONLY, LINUX_O_RDONLY); - obit_convert(&cflag, &sflag, O_WRONLY, LINUX_O_WRONLY); - obit_convert(&cflag, &sflag, O_RDWR, LINUX_O_RDWR); - obit_convert(&cflag, &sflag, O_NONBLOCK, LINUX_O_NONBLOCK); - obit_convert(&cflag, &sflag, O_APPEND, LINUX_O_APPEND); - obit_convert(&cflag, &sflag, O_ASYNC, LINUX_O_ASYNC); - obit_convert(&cflag, &sflag, O_FSYNC, LINUX_O_FSYNC); - obit_convert(&cflag, &sflag, O_NOFOLLOW, LINUX_O_NOFOLLOW); - obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT); - obit_convert(&cflag, &sflag, O_TRUNC, LINUX_O_TRUNC); - obit_convert(&cflag, &sflag, O_EXCL, LINUX_O_EXCL); - obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT); - obit_convert(&cflag, &sflag, O_NDELAY, LINUX_O_NDELAY); - obit_convert(&cflag, &sflag, O_NOCTTY, LINUX_O_NOCTTY); - /* - * Some more obscure BSD flags have no Linux counterparts: - * - * O_SHLOCK 0x0010 - * O_EXLOCK 0x0020 - * O_EVTONLY 0x8000 - * O_POPUP 0x80000000 - * O_ALERT 0x20000000 - */ - if (cflag == 0) { - *result = sflag; - return 0; - } else - return -EINVAL; -} - -#ifdef __DARWIN8__ -#else /* !__DARWIN8__ */ -extern int unix_syscall(); -extern int unix_syscall_return(); - -extern int ktrsysret(); -extern int ktrace(); - -extern int ast_taken(); -extern int ast_check(); - -extern int trap(); -extern int syscall_trace(); - -static int is_addr_in_range(void *addr, void *start, void *end) -{ - return start <= addr && addr <= end; -} - -extern void cfs_thread_agent (void); - -static int is_last_frame(void *addr) -{ - if (addr == NULL) - return 1; - else if (is_addr_in_range(addr, unix_syscall, unix_syscall_return)) - return 1; - else if (is_addr_in_range(addr, ktrsysret, ktrace)) - return 1; - else if (is_addr_in_range(addr, ast_taken, ast_check)) - return 1; - else if (is_addr_in_range(addr, trap, syscall_trace)) - return 1; - else if (is_addr_in_range(addr, cfs_thread_agent, cfs_kernel_thread)) - return 1; - else - return 0; -} - -static void *get_frame(int i) -{ - void *result; - -#define CASE(i) case (i): result = __builtin_return_address(i); break - switch (i + 1) { - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - CASE(16); - CASE(17); - CASE(18); - CASE(19); - CASE(20); - default: - panic("impossible frame number: %d\n", i); - result = NULL; - } - return result; -} - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{ - int i; - - memset(trace, 0, sizeof *trace); - for (i = 0; i < sizeof_array(trace->frame); ++ i) { - void *addr; - - addr = get_frame(i); - trace->frame[i] = addr; - if (is_last_frame(addr)) - break; - } -} - -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - if (0 <= frame_no && frame_no < sizeof_array(trace->frame)) - return trace->frame[frame_no]; - else - return NULL; -} -#endif /* !__DARWIN8__ */ diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c deleted file mode 100644 index 9810bdb900ed85f21333af7ae003596dfd3b739a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/debug.c +++ /dev/null @@ -1,839 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -# define DEBUG_SUBSYSTEM S_LNET - -#include <stdarg.h> -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> -#include "tracefile.h" - -static char debug_file_name[1024]; - -#ifdef __KERNEL__ -unsigned int libcfs_subsystem_debug = ~0; -CFS_MODULE_PARM(libcfs_subsystem_debug, "i", int, 0644, - "Lustre kernel debug subsystem mask"); -EXPORT_SYMBOL(libcfs_subsystem_debug); - -unsigned int libcfs_debug = (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE | - D_NETERROR | D_HA | D_CONFIG | D_IOCTL); -CFS_MODULE_PARM(libcfs_debug, "i", int, 0644, - "Lustre kernel debug mask"); -EXPORT_SYMBOL(libcfs_debug); - -int libcfs_debug_mb = -1; -CFS_MODULE_PARM(libcfs_debug_mb, "i", int, 0644, - "Total debug buffer size."); -EXPORT_SYMBOL(libcfs_debug_mb); - -unsigned int libcfs_printk = D_CANTMASK; -CFS_MODULE_PARM(libcfs_printk, "i", uint, 0644, - "Lustre kernel debug console mask"); -EXPORT_SYMBOL(libcfs_printk); - -unsigned int libcfs_console_ratelimit = 1; -CFS_MODULE_PARM(libcfs_console_ratelimit, "i", uint, 0644, - "Lustre kernel debug console ratelimit (0 to disable)"); -EXPORT_SYMBOL(libcfs_console_ratelimit); - -cfs_duration_t libcfs_console_max_delay; -CFS_MODULE_PARM(libcfs_console_max_delay, "l", ulong, 0644, - "Lustre kernel debug console max delay (jiffies)"); -EXPORT_SYMBOL(libcfs_console_max_delay); - -cfs_duration_t libcfs_console_min_delay; -CFS_MODULE_PARM(libcfs_console_min_delay, "l", ulong, 0644, - "Lustre kernel debug console min delay (jiffies)"); -EXPORT_SYMBOL(libcfs_console_min_delay); - -unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; -CFS_MODULE_PARM(libcfs_console_backoff, "i", uint, 0644, - "Lustre kernel debug console backoff factor"); -EXPORT_SYMBOL(libcfs_console_backoff); - -unsigned int libcfs_debug_binary = 1; -EXPORT_SYMBOL(libcfs_debug_binary); - -unsigned int libcfs_stack; -EXPORT_SYMBOL(libcfs_stack); - -unsigned int portal_enter_debugger; -EXPORT_SYMBOL(portal_enter_debugger); - -unsigned int libcfs_catastrophe; -EXPORT_SYMBOL(libcfs_catastrophe); - -unsigned int libcfs_panic_on_lbug = 0; -CFS_MODULE_PARM(libcfs_panic_on_lbug, "i", uint, 0644, - "Lustre kernel panic on LBUG"); -EXPORT_SYMBOL(libcfs_panic_on_lbug); - -atomic_t libcfs_kmemory = ATOMIC_INIT(0); -EXPORT_SYMBOL(libcfs_kmemory); - -static cfs_waitq_t debug_ctlwq; - -#ifdef __arch_um__ -char debug_file_path[1024] = "/r/tmp/lustre-log"; -#else -char debug_file_path[1024] = "/tmp/lustre-log"; -#endif -CFS_MODULE_PARM(debug_file_path, "s", charp, 0644, - "Path for dumping debug logs, " - "set 'NONE' to prevent log dumping"); - -int libcfs_panic_in_progress; - -/* libcfs_debug_token2mask() expects the returned - * string in lower-case */ -const char * -libcfs_debug_subsys2str(int subsys) -{ - switch (subsys) { - default: - return NULL; - case S_UNDEFINED: - return "undefined"; - case S_MDC: - return "mdc"; - case S_MDS: - return "mds"; - case S_OSC: - return "osc"; - case S_OST: - return "ost"; - case S_CLASS: - return "class"; - case S_LOG: - return "log"; - case S_LLITE: - return "llite"; - case S_RPC: - return "rpc"; - case S_LNET: - return "lnet"; - case S_LND: - return "lnd"; - case S_PINGER: - return "pinger"; - case S_FILTER: - return "filter"; - case S_ECHO: - return "echo"; - case S_LDLM: - return "ldlm"; - case S_LOV: - return "lov"; - case S_LMV: - return "lmv"; - case S_SEC: - return "sec"; - case S_GSS: - return "gss"; - case S_MGC: - return "mgc"; - case S_MGS: - return "mgs"; - case S_FID: - return "fid"; - case S_FLD: - return "fld"; - } -} - -/* libcfs_debug_token2mask() expects the returned - * string in lower-case */ -const char * -libcfs_debug_dbg2str(int debug) -{ - switch (debug) { - default: - return NULL; - case D_TRACE: - return "trace"; - case D_INODE: - return "inode"; - case D_SUPER: - return "super"; - case D_EXT2: - return "ext2"; - case D_MALLOC: - return "malloc"; - case D_CACHE: - return "cache"; - case D_INFO: - return "info"; - case D_IOCTL: - return "ioctl"; - case D_NETERROR: - return "neterror"; - case D_NET: - return "net"; - case D_WARNING: - return "warning"; - case D_BUFFS: - return "buffs"; - case D_OTHER: - return "other"; - case D_DENTRY: - return "dentry"; - case D_NETTRACE: - return "nettrace"; - case D_PAGE: - return "page"; - case D_DLMTRACE: - return "dlmtrace"; - case D_ERROR: - return "error"; - case D_EMERG: - return "emerg"; - case D_HA: - return "ha"; - case D_RPCTRACE: - return "rpctrace"; - case D_VFSTRACE: - return "vfstrace"; - case D_READA: - return "reada"; - case D_MMAP: - return "mmap"; - case D_CONFIG: - return "config"; - case D_CONSOLE: - return "console"; - case D_QUOTA: - return "quota"; - case D_SEC: - return "sec"; - } -} - -int -libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int len = 0; - const char *token; - int bit; - int i; - - if (mask == 0) { /* "0" */ - if (size > 0) - str[0] = '0'; - len = 1; - } else { /* space-separated tokens */ - for (i = 0; i < 32; i++) { - bit = 1 << i; - - if ((mask & bit) == 0) - continue; - - token = fn(bit); - if (token == NULL) /* unused bit */ - continue; - - if (len > 0) { /* separator? */ - if (len < size) - str[len] = ' '; - len++; - } - - while (*token != 0) { - if (len < size) - str[len] = *token; - token++; - len++; - } - } - } - - /* terminate 'str' */ - if (len < size) - str[len] = 0; - else - str[size - 1] = 0; - - return len; -} - -int -libcfs_debug_token2mask(int *mask, const char *str, int len, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int i; - int j; - int bit; - const char *token; - - /* match against known tokens */ - for (i = 0; i < 32; i++) { - bit = 1 << i; - - token = fn(bit); - if (token == NULL) /* unused? */ - continue; - - /* strcasecmp */ - for (j = 0; ; j++) { - if (j == len) { /* end of token */ - if (token[j] == 0) { - *mask = bit; - return 0; - } - break; - } - - if (token[j] == 0) - break; - - if (str[j] == token[j]) - continue; - - if (str[j] < 'A' || 'Z' < str[j]) - break; - - if (str[j] - 'A' + 'a' != token[j]) - break; - } - } - - return -EINVAL; /* no match */ -} - -int -libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) -{ - int m = 0; - char op = 0; - int matched; - int n; - int t; - - /* Allow a number for backwards compatibility */ - - for (n = strlen(str); n > 0; n--) - if (!isspace(str[n-1])) - break; - matched = n; - - if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 && - matched == n) { - *mask = m; - return 0; - } - - /* <str> must be a list of debug tokens or numbers separated by - * whitespace and optionally an operator ('+' or '-'). If an operator - * appears first in <str>, '*mask' is used as the starting point - * (relative), otherwise 0 is used (absolute). An operator applies to - * all following tokens up to the next operator. */ - - matched = 0; - while (*str != 0) { - while (isspace(*str)) /* skip whitespace */ - str++; - - if (*str == 0) - break; - - if (*str == '+' || *str == '-') { - op = *str++; - - /* op on first token == relative */ - if (!matched) - m = *mask; - - while (isspace(*str)) /* skip whitespace */ - str++; - - if (*str == 0) /* trailing op */ - return -EINVAL; - } - - /* find token length */ - for (n = 0; str[n] != 0 && !isspace(str[n]); n++); - - /* match token */ - if (libcfs_debug_token2mask(&t, str, n, is_subsys) != 0) - return -EINVAL; - - matched = 1; - if (op == '-') - m &= ~t; - else - m |= t; - - str += n; - } - - if (!matched) - return -EINVAL; - - *mask = m; - return 0; -} - -void libcfs_debug_dumplog_internal(void *arg) -{ - CFS_DECL_JOURNAL_DATA; - - CFS_PUSH_JOURNAL; - - if (strncmp(debug_file_path, "NONE", 4) != 0) { - snprintf(debug_file_name, sizeof(debug_file_name) - 1, - "%s.%ld.%ld", debug_file_path, cfs_time_current_sec(), - (long)arg); - printk(KERN_ALERT "LustreError: dumping log to %s\n", - debug_file_name); - tracefile_dump_all_pages(debug_file_name); - } - CFS_POP_JOURNAL; -} - -int libcfs_debug_dumplog_thread(void *arg) -{ - cfs_daemonize(""); - libcfs_debug_dumplog_internal(arg); - cfs_waitq_signal(&debug_ctlwq); - return 0; -} - -void libcfs_debug_dumplog(void) -{ - int rc; - cfs_waitlink_t wait; - ENTRY; - - /* we're being careful to ensure that the kernel thread is - * able to set our state to running as it exits before we - * get to schedule() */ - cfs_waitlink_init(&wait); - set_current_state(TASK_INTERRUPTIBLE); - cfs_waitq_add(&debug_ctlwq, &wait); - - rc = cfs_kernel_thread(libcfs_debug_dumplog_thread, - (void *)(long)cfs_curproc_pid(), - CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) - printk(KERN_ERR "LustreError: cannot start log dump thread: " - "%d\n", rc); - else - cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE); - - /* be sure to teardown if kernel_thread() failed */ - cfs_waitq_del(&debug_ctlwq, &wait); - set_current_state(TASK_RUNNING); -} - -int libcfs_debug_init(unsigned long bufsize) -{ - int rc = 0; - int max = libcfs_debug_mb; - - cfs_waitq_init(&debug_ctlwq); - libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; - libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; - /* If libcfs_debug_mb is set to an invalid value or uninitialized - * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES */ - if (max > trace_max_debug_mb() || max < num_possible_cpus()) { - max = TCD_MAX_PAGES; - } else { - max = (max / num_possible_cpus()); - max = (max << (20 - CFS_PAGE_SHIFT)); - } - rc = tracefile_init(max); - - if (rc == 0) - libcfs_register_panic_notifier(); - - return rc; -} - -int libcfs_debug_cleanup(void) -{ - libcfs_unregister_panic_notifier(); - tracefile_exit(); - return 0; -} - -int libcfs_debug_clear_buffer(void) -{ - trace_flush_pages(); - return 0; -} - -/* Debug markers, although printed by S_LNET - * should not be be marked as such. */ -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_UNDEFINED -int libcfs_debug_mark_buffer(char *text) -{ - CDEBUG(D_TRACE,"***************************************************\n"); - CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text); - CDEBUG(D_TRACE,"***************************************************\n"); - - return 0; -} -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_LNET - -void libcfs_debug_set_level(unsigned int debug_level) -{ - printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n", - debug_level); - libcfs_debug = debug_level; -} - -EXPORT_SYMBOL(libcfs_debug_dumplog); -EXPORT_SYMBOL(libcfs_debug_set_level); - - -#else /* !__KERNEL__ */ - -#include <libcfs/libcfs.h> - -#ifdef HAVE_CATAMOUNT_DATA_H -#include <catamount/data.h> -#include <catamount/lputs.h> - -static char source_nid[16]; -/* 0 indicates no messages to console, 1 is errors, > 1 is all debug messages */ -static int toconsole = 1; -unsigned int libcfs_console_ratelimit = 1; -cfs_duration_t libcfs_console_max_delay; -cfs_duration_t libcfs_console_min_delay; -unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; -#else /* !HAVE_CATAMOUNT_DATA_H */ -#ifdef HAVE_NETDB_H -#include <sys/utsname.h> -#endif /* HAVE_NETDB_H */ -struct utsname *tmp_utsname; -static char source_nid[sizeof(tmp_utsname->nodename)]; -#endif /* HAVE_CATAMOUNT_DATA_H */ - -static int source_pid; -int smp_processor_id = 1; -char debug_file_path[1024]; -FILE *debug_file_fd; - -int portals_do_debug_dumplog(void *arg) -{ - printf("Look in %s\n", debug_file_name); - return 0; -} - - -void portals_debug_print(void) -{ - return; -} - - -void libcfs_debug_dumplog(void) -{ - printf("Look in %s\n", debug_file_name); - return; -} - -int libcfs_debug_init(unsigned long bufsize) -{ - char *debug_mask = NULL; - char *debug_subsys = NULL; - char *debug_filename; - -#ifdef HAVE_CATAMOUNT_DATA_H - char *debug_console = NULL; - char *debug_ratelimit = NULL; - char *debug_max_delay = NULL; - char *debug_min_delay = NULL; - char *debug_backoff = NULL; - - libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; - libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; - - snprintf(source_nid, sizeof(source_nid) - 1, "%u", _my_pnid); - source_pid = _my_pid; - - debug_console = getenv("LIBLUSTRE_DEBUG_CONSOLE"); - if (debug_console != NULL) { - toconsole = strtoul(debug_console, NULL, 0); - CDEBUG(D_INFO, "set liblustre toconsole to %u\n", toconsole); - } - debug_ratelimit = getenv("LIBLUSTRE_DEBUG_CONSOLE_RATELIMIT"); - if (debug_ratelimit != NULL) { - libcfs_console_ratelimit = strtoul(debug_ratelimit, NULL, 0); - CDEBUG(D_INFO, "set liblustre console ratelimit to %u\n", - libcfs_console_ratelimit); - } - debug_max_delay = getenv("LIBLUSTRE_DEBUG_CONSOLE_MAX_DELAY"); - if (debug_max_delay != NULL) - libcfs_console_max_delay = - cfs_time_seconds(strtoul(debug_max_delay, NULL, 0)); - debug_min_delay = getenv("LIBLUSTRE_DEBUG_CONSOLE_MIN_DELAY"); - if (debug_min_delay != NULL) - libcfs_console_min_delay = - cfs_time_seconds(strtoul(debug_min_delay, NULL, 0)); - if (debug_min_delay || debug_max_delay) { - if (!libcfs_console_max_delay || !libcfs_console_min_delay || - libcfs_console_max_delay < libcfs_console_min_delay) { - libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; - libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; - CDEBUG(D_INFO, "LIBLUSTRE_DEBUG_CONSOLE_MAX_DELAY " - "should be greater than " - "LIBLUSTRE_DEBUG_CONSOLE_MIN_DELAY " - "and both parameters should be non-null" - ": restore default values\n"); - } else { - CDEBUG(D_INFO, "set liblustre console max delay to %lus" - " and min delay to %lus\n", - (cfs_duration_t) - cfs_duration_sec(libcfs_console_max_delay), - (cfs_duration_t) - cfs_duration_sec(libcfs_console_min_delay)); - } - } - debug_backoff = getenv("LIBLUSTRE_DEBUG_CONSOLE_BACKOFF"); - if (debug_backoff != NULL) { - libcfs_console_backoff = strtoul(debug_backoff, NULL, 0); - if (libcfs_console_backoff <= 0) { - libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; - CDEBUG(D_INFO, "LIBLUSTRE_DEBUG_CONSOLE_BACKOFF <= 0: " - "restore default value\n"); - } else { - CDEBUG(D_INFO, "set liblustre console backoff to %u\n", - libcfs_console_backoff); - } - } -#else - struct utsname myname; - - if (uname(&myname) == 0) - strcpy(source_nid, myname.nodename); - source_pid = getpid(); -#endif - /* debug masks */ - debug_mask = getenv("LIBLUSTRE_DEBUG_MASK"); - if (debug_mask) - libcfs_debug = (unsigned int) strtol(debug_mask, NULL, 0); - - debug_subsys = getenv("LIBLUSTRE_DEBUG_SUBSYS"); - if (debug_subsys) - libcfs_subsystem_debug = - (unsigned int) strtol(debug_subsys, NULL, 0); - - debug_filename = getenv("LIBLUSTRE_DEBUG_BASE"); - if (debug_filename) - strncpy(debug_file_path,debug_filename,sizeof(debug_file_path)); - - debug_filename = getenv("LIBLUSTRE_DEBUG_FILE"); - if (debug_filename) - strncpy(debug_file_name,debug_filename,sizeof(debug_file_name)); - - if (debug_file_name[0] == '\0' && debug_file_path[0] != '\0') - snprintf(debug_file_name, sizeof(debug_file_name) - 1, - "%s-%s-"CFS_TIME_T".log", debug_file_path, source_nid, time(0)); - - if (strcmp(debug_file_name, "stdout") == 0 || - strcmp(debug_file_name, "-") == 0) { - debug_file_fd = stdout; - } else if (strcmp(debug_file_name, "stderr") == 0) { - debug_file_fd = stderr; - } else if (debug_file_name[0] != '\0') { - debug_file_fd = fopen(debug_file_name, "w"); - if (debug_file_fd == NULL) - fprintf(stderr, "%s: unable to open '%s': %s\n", - source_nid, debug_file_name, strerror(errno)); - } - - if (debug_file_fd == NULL) - debug_file_fd = stdout; - - return 0; -} - -int libcfs_debug_cleanup(void) -{ - if (debug_file_fd != stdout && debug_file_fd != stderr) - fclose(debug_file_fd); - return 0; -} - -int libcfs_debug_clear_buffer(void) -{ - return 0; -} - -int libcfs_debug_mark_buffer(char *text) -{ - - fprintf(debug_file_fd, "*******************************************************************************\n"); - fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text); - fprintf(debug_file_fd, "*******************************************************************************\n"); - - return 0; -} - -#ifdef HAVE_CATAMOUNT_DATA_H -#define CATAMOUNT_MAXLINE (256-4) -void catamount_printline(char *buf, size_t size) -{ - char *pos = buf; - int prsize = size; - - while (prsize > 0){ - lputs(pos); - pos += CATAMOUNT_MAXLINE; - prsize -= CATAMOUNT_MAXLINE; - } -} -#endif - -int -libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, - int subsys, int mask, - const char *file, const char *fn, const int line, - const char *format1, va_list args, - const char *format2, ...) -{ - struct timeval tv; - int nob; - int remain; - va_list ap; - char buf[CFS_PAGE_SIZE]; /* size 4096 used for compatimble - * with linux, where message can`t - * be exceed PAGE_SIZE */ - int console = 0; - char *prefix = "Lustre"; - -#ifdef HAVE_CATAMOUNT_DATA_H - /* toconsole == 0 - all messages to debug_file_fd - * toconsole == 1 - warnings to console, all to debug_file_fd - * toconsole > 1 - all debug to console */ - if (((mask & libcfs_printk) && toconsole == 1) || toconsole > 1) - console = 1; -#endif - - if ((!console) && (!debug_file_fd)) { - return 0; - } - - if (mask & (D_EMERG | D_ERROR)) - prefix = "LustreError"; - - nob = snprintf(buf, sizeof(buf), "%s: %u-%s:(%s:%d:%s()): ", prefix, - source_pid, source_nid, file, line, fn); - - remain = sizeof(buf) - nob; - if (format1) { - nob += vsnprintf(&buf[nob], remain, format1, args); - } - - remain = sizeof(buf) - nob; - if ((format2) && (remain > 0)) { - va_start(ap, format2); - nob += vsnprintf(&buf[nob], remain, format2, ap); - va_end(ap); - } - -#ifdef HAVE_CATAMOUNT_DATA_H - if (console) { - /* check rate limit for console */ - if (cdls != NULL) { - if (libcfs_console_ratelimit && - cdls->cdls_next != 0 && /* not first time ever */ - !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { - - /* skipping a console message */ - cdls->cdls_count++; - goto out_file; - } - - if (cfs_time_after(cfs_time_current(), cdls->cdls_next + - libcfs_console_max_delay + - cfs_time_seconds(10))) { - /* last timeout was a long time ago */ - cdls->cdls_delay /= libcfs_console_backoff * 4; - } else { - cdls->cdls_delay *= libcfs_console_backoff; - - if (cdls->cdls_delay < - libcfs_console_min_delay) - cdls->cdls_delay = - libcfs_console_min_delay; - else if (cdls->cdls_delay > - libcfs_console_max_delay) - cdls->cdls_delay = - libcfs_console_max_delay; - } - - /* ensure cdls_next is never zero after it's been seen */ - cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; - } - - if (cdls != NULL && cdls->cdls_count != 0) { - char buf2[100]; - - nob = snprintf(buf2, sizeof(buf2), - "Skipped %d previous similar message%s\n", - cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : ""); - - catamount_printline(buf2, nob); - cdls->cdls_count = 0; - goto out_file; - } - catamount_printline(buf, nob); - } -out_file: - /* return on toconsole > 1, as we don't want the user getting - * spammed by the debug data */ - if (toconsole > 1) - return 0; -#endif - if (debug_file_fd == NULL) - return 0; - - gettimeofday(&tv, NULL); - - fprintf(debug_file_fd, CFS_TIME_T".%06lu:%u:%s:(%s:%d:%s()): %s", - tv.tv_sec, tv.tv_usec, source_pid, source_nid, - file, line, fn, buf); - - return 0; -} - -void -libcfs_assertion_failed(const char *expr, const char *file, const char *func, - const int line) -{ - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, - "ASSERTION(%s) failed\n", expr); - abort(); -} - -#endif /* __KERNEL__ */ diff --git a/lnet/libcfs/libcfs.xcode/project.pbxproj b/lnet/libcfs/libcfs.xcode/project.pbxproj deleted file mode 100644 index 479c21b977925d925173db71f4d96a33b2487732..0000000000000000000000000000000000000000 --- a/lnet/libcfs/libcfs.xcode/project.pbxproj +++ /dev/null @@ -1,439 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 32A4FEB80562C75700D090E7, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 089C167CFE841241C02AAC07, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = libcfs; - refType = 4; - sourceTree = "<group>"; - }; - 089C167CFE841241C02AAC07 = { - children = ( - 32A4FEC30562C75700D090E7, - ); - isa = PBXGroup; - name = Resources; - refType = 4; - sourceTree = "<group>"; - }; -//080 -//081 -//082 -//083 -//084 -//190 -//191 -//192 -//193 -//194 - 19444794072D07AD00DAF9BC = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = tracefile.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19444795072D07AD00DAF9BC = { - fileRef = 19444794072D07AD00DAF9BC; - isa = PBXBuildFile; - settings = { - }; - }; - 19444796072D08AA00DAF9BC = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = debug.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19444797072D08AA00DAF9BC = { - fileRef = 19444796072D08AA00DAF9BC; - isa = PBXBuildFile; - settings = { - }; - }; - 19509C03072CD5FF00A958C3 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = module.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19509C04072CD5FF00A958C3 = { - fileRef = 19509C03072CD5FF00A958C3; - isa = PBXBuildFile; - settings = { - }; - }; - 19713B76072E8274004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_prim.c; - path = arch/xnu/cfs_prim.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713B77072E8274004E8469 = { - fileRef = 19713B76072E8274004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713BB7072E8281004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_mem.c; - path = arch/xnu/cfs_mem.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713BB8072E8281004E8469 = { - fileRef = 19713BB7072E8281004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713BF7072E828E004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_proc.c; - path = arch/xnu/cfs_proc.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713BF8072E828E004E8469 = { - fileRef = 19713BF7072E828E004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713C7A072E82B2004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_utils.c; - path = arch/xnu/cfs_utils.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713C7B072E82B2004E8469 = { - fileRef = 19713C7A072E82B2004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713CD6072E8A56004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_module.c; - path = arch/xnu/cfs_module.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713CD7072E8A56004E8469 = { - fileRef = 19713CD6072E8A56004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713D1B072E8E39004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_fs.c; - path = arch/xnu/cfs_fs.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713D1C072E8E39004E8469 = { - fileRef = 19713D1B072E8E39004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713D60072E9109004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = xnu_sync.c; - path = arch/xnu/xnu_sync.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713D61072E9109004E8469 = { - fileRef = 19713D60072E9109004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713DC2072F994D004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_tracefile.c; - path = arch/xnu/cfs_tracefile.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713DC3072F994D004E8469 = { - fileRef = 19713DC2072F994D004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713E1C072FAFB5004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_debug.c; - path = arch/xnu/cfs_debug.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713E1D072FAFB5004E8469 = { - fileRef = 19713E1C072FAFB5004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 32A4FEC40562C75800D090E7, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = "<group>"; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 19713E1C072FAFB5004E8469, - 19713DC2072F994D004E8469, - 19713D60072E9109004E8469, - 19713D1B072E8E39004E8469, - 19713CD6072E8A56004E8469, - 19713C7A072E82B2004E8469, - 19713BF7072E828E004E8469, - 19713BB7072E8281004E8469, - 19713B76072E8274004E8469, - 19444796072D08AA00DAF9BC, - 19444794072D07AD00DAF9BC, - 19509C03072CD5FF00A958C3, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = "<group>"; - }; -//240 -//241 -//242 -//243 -//244 -//320 -//321 -//322 -//323 -//324 - 32A4FEB80562C75700D090E7 = { - buildPhases = ( - 32A4FEB90562C75700D090E7, - 32A4FEBA0562C75700D090E7, - 32A4FEBB0562C75700D090E7, - 32A4FEBD0562C75700D090E7, - 32A4FEBF0562C75700D090E7, - 32A4FEC00562C75700D090E7, - 32A4FEC10562C75700D090E7, - ); - buildRules = ( - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; - GCC_WARN_UNKNOWN_PRAGMAS = NO; - HEADER_SEARCH_PATHS = ../include; - INFOPLIST_FILE = Info.plist; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.clusterfs.lustre.portals.libcfs; - MODULE_START = libcfs_start; - MODULE_STOP = libcfs_stop; - MODULE_VERSION = 1.0.1; - OTHER_CFLAGS = "-D__KERNEL__"; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = libcfs; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXNativeTarget; - name = libcfs; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = libcfs; - productReference = 32A4FEC40562C75800D090E7; - productType = "com.apple.product-type.kernel-extension"; - }; - 32A4FEB90562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEBA0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBB0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBD0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - 19509C04072CD5FF00A958C3, - 19444795072D07AD00DAF9BC, - 19444797072D08AA00DAF9BC, - 19713B77072E8274004E8469, - 19713BB8072E8281004E8469, - 19713BF8072E828E004E8469, - 19713C7B072E82B2004E8469, - 19713CD7072E8A56004E8469, - 19713D1C072E8E39004E8469, - 19713D61072E9109004E8469, - 19713DC3072F994D004E8469, - 19713E1D072FAFB5004E8469, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBF0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC00562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC10562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEC30562C75700D090E7 = { - isa = PBXFileReference; - lastKnownFileType = text.plist.xml; - path = Info.plist; - refType = 4; - sourceTree = "<group>"; - }; - 32A4FEC40562C75800D090E7 = { - explicitFileType = wrapper.cfbundle; - includeInIndex = 0; - isa = PBXFileReference; - path = libcfs.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/lnet/libcfs/linux/.cvsignore b/lnet/libcfs/linux/.cvsignore deleted file mode 100644 index 2bc4137ea828524412757790d1c06c9ce24fec2c..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -Makefile -Makefile.in -*.o.cmd diff --git a/lnet/libcfs/linux/Makefile.am b/lnet/libcfs/linux/Makefile.am deleted file mode 100644 index 8bf35ccc7742ddb59c35df71b378309746440d5d..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/Makefile.am +++ /dev/null @@ -1,4 +0,0 @@ -EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \ - linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \ - linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c - diff --git a/lnet/libcfs/linux/linux-curproc.c b/lnet/libcfs/linux/linux-curproc.c deleted file mode 100644 index e446169b34b8da52ff8b7325e28c3394cedf9e9c..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-curproc.c +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre curproc API implementation for Linux kernel - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - * Public License for more details. You should have received a copy of the GNU - * General Public License along with Lustre; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/sched.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) - * for Linux kernel. - */ - -uid_t cfs_curproc_uid(void) -{ - return current->uid; -} - -gid_t cfs_curproc_gid(void) -{ - return current->gid; -} - -uid_t cfs_curproc_fsuid(void) -{ - return current->fsuid; -} - -gid_t cfs_curproc_fsgid(void) -{ - return current->fsgid; -} - -pid_t cfs_curproc_pid(void) -{ - return current->pid; -} - -int cfs_curproc_groups_nr(void) -{ - int nr; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) - task_lock(current); - nr = current->group_info->ngroups; - task_unlock(current); -#else - nr = current->ngroups; -#endif - return nr; -} - -void cfs_curproc_groups_dump(gid_t *array, int size) -{ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) - task_lock(current); - size = min_t(int, size, current->group_info->ngroups); - memcpy(array, current->group_info->blocks[0], size * sizeof(__u32)); - task_unlock(current); -#else - LASSERT(size <= NGROUPS); - size = min_t(int, size, current->ngroups); - memcpy(array, current->groups, size * sizeof(__u32)); -#endif -} - - -int cfs_curproc_is_in_groups(gid_t gid) -{ - return in_group_p(gid); -} - -mode_t cfs_curproc_umask(void) -{ - return current->fs->umask; -} - -char *cfs_curproc_comm(void) -{ - return current->comm; -} - -cfs_kernel_cap_t cfs_curproc_cap_get(void) -{ - return current->cap_effective; -} - -void cfs_curproc_cap_set(cfs_kernel_cap_t cap) -{ - current->cap_effective = cap; -} - -EXPORT_SYMBOL(cfs_curproc_uid); -EXPORT_SYMBOL(cfs_curproc_pid); -EXPORT_SYMBOL(cfs_curproc_gid); -EXPORT_SYMBOL(cfs_curproc_fsuid); -EXPORT_SYMBOL(cfs_curproc_fsgid); -EXPORT_SYMBOL(cfs_curproc_umask); -EXPORT_SYMBOL(cfs_curproc_comm); -EXPORT_SYMBOL(cfs_curproc_groups_nr); -EXPORT_SYMBOL(cfs_curproc_groups_dump); -EXPORT_SYMBOL(cfs_curproc_is_in_groups); -EXPORT_SYMBOL(cfs_curproc_cap_get); -EXPORT_SYMBOL(cfs_curproc_cap_set); - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/linux/linux-debug.c b/lnet/libcfs/linux/linux-debug.c deleted file mode 100644 index 9b2a9dc5c1659ad7dc5dca4813ba4cc5c18d57e9..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-debug.c +++ /dev/null @@ -1,239 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/notifier.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/interrupt.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/completion.h> - -#include <linux/fs.h> -#include <linux/stat.h> -#include <asm/uaccess.h> -#include <linux/miscdevice.h> -#include <linux/version.h> - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/linux/portals_compat25.h> -#include <libcfs/libcfs.h> - -#include "tracefile.h" - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include <linux/kallsyms.h> -#endif - -char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall"; - -void libcfs_run_upcall(char **argv) -{ - int rc; - int argc; - char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - ENTRY; - - argv[0] = lnet_upcall; - argc = 1; - while (argv[argc] != NULL) - argc++; - - LASSERT(argc >= 2); - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0 && rc != -ENOENT) { - CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; " - "check /proc/sys/lnet/upcall\n", - rc, argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } else { - CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n", - argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } -} - -void libcfs_run_lbug_upcall(char *file, const char *fn, const int line) -{ - char *argv[6]; - char buf[32]; - - ENTRY; - snprintf (buf, sizeof buf, "%d", line); - - argv[1] = "LBUG"; - argv[2] = file; - argv[3] = (char *)fn; - argv[4] = buf; - argv[5] = NULL; - - libcfs_run_upcall (argv); -} - -#ifdef __arch_um__ -void lbug_with_loc(char *file, const char *func, const int line) -{ - libcfs_catastrophe = 1; - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, - "LBUG - trying to dump log to %s\n", debug_file_path); - libcfs_debug_dumplog(); - libcfs_run_lbug_upcall(file, func, line); - asm("int $3"); - panic("LBUG"); -} -#else -/* coverity[+kill] */ -void lbug_with_loc(char *file, const char *func, const int line) -{ - libcfs_catastrophe = 1; - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n"); - - if (in_interrupt()) { - panic("LBUG in interrupt.\n"); - /* not reached */ - } - - libcfs_debug_dumpstack(NULL); - libcfs_debug_dumplog(); - libcfs_run_lbug_upcall(file, func, line); - if (libcfs_panic_on_lbug) - panic("LBUG"); - set_task_state(current, TASK_UNINTERRUPTIBLE); - while (1) - schedule(); -} -#endif /* __arch_um__ */ - -#ifdef __KERNEL__ - -void libcfs_debug_dumpstack(struct task_struct *tsk) -{ -#if defined(__arch_um__) - if (tsk != NULL) - CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n", - tsk->pid, UML_PID(tsk)); - //asm("int $3"); -#elif defined(HAVE_SHOW_TASK) - /* this is exported by lustre kernel version 42 */ - extern void show_task(struct task_struct *); - - if (tsk == NULL) - tsk = current; - CWARN("showing stack for process %d\n", tsk->pid); - show_task(tsk); -#else - if ((tsk == NULL) || (tsk == current)) - dump_stack(); - else - CWARN("can't show stack: kernel doesn't export show_task\n"); -#endif -} - -cfs_task_t *libcfs_current(void) -{ - CWARN("current task struct is %p\n", current); - return current; -} - -static int panic_notifier(struct notifier_block *self, unsigned long unused1, - void *unused2) -{ - if (libcfs_panic_in_progress) - return 0; - - libcfs_panic_in_progress = 1; - mb(); - -#ifdef LNET_DUMP_ON_PANIC - /* This is currently disabled because it spews far too much to the - * console on the rare cases it is ever triggered. */ - - if (in_interrupt()) { - trace_debug_print(); - } else { - while (current->lock_depth >= 0) - unlock_kernel(); - - libcfs_debug_dumplog_internal((void *)(long)cfs_curproc_pid()); - } -#endif - return 0; -} - -static struct notifier_block libcfs_panic_notifier = { - notifier_call : panic_notifier, - next : NULL, - priority : 10000 -}; - -void libcfs_register_panic_notifier(void) -{ -#ifdef HAVE_ATOMIC_PANIC_NOTIFIER - atomic_notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier); -#else - notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier); -#endif -} - -void libcfs_unregister_panic_notifier(void) -{ -#ifdef HAVE_ATOMIC_PANIC_NOTIFIER - atomic_notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier); -#else - notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier); -#endif -} - -EXPORT_SYMBOL(libcfs_debug_dumpstack); -EXPORT_SYMBOL(libcfs_current); - -#endif /* __KERNEL__ */ - -EXPORT_SYMBOL(libcfs_run_upcall); -EXPORT_SYMBOL(libcfs_run_lbug_upcall); -EXPORT_SYMBOL(lbug_with_loc); diff --git a/lnet/libcfs/linux/linux-fs.c b/lnet/libcfs/linux/linux-fs.c deleted file mode 100644 index 3b15576c5c3df001f0479568f4b8fb8b9f210bae..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-fs.c +++ /dev/null @@ -1,100 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - -#include <linux/fs.h> -#include <linux/kdev_t.h> -#include <linux/ctype.h> -#include <asm/uaccess.h> - -#include <libcfs/libcfs.h> - -cfs_file_t * -cfs_filp_open (const char *name, int flags, int mode, int *err) -{ - /* XXX - * Maybe we need to handle flags and mode in the future - */ - cfs_file_t *filp = NULL; - - filp = filp_open(name, flags, mode); - if (IS_ERR(filp)) { - int rc; - - rc = PTR_ERR(filp); - printk(KERN_ERR "LustreError: can't open %s file: err %d\n", - name, rc); - if (err) - *err = rc; - filp = NULL; - } - return filp; -} - -/* write a userspace buffer to disk. - * NOTE: this returns 0 on success, not the number of bytes written. */ -ssize_t -cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset) -{ - mm_segment_t fs; - ssize_t size = 0; - - fs = get_fs(); - set_fs(KERNEL_DS); - while (count > 0) { - size = filp->f_op->write(filp, (char *)buf, count, offset); - if (size < 0) - break; - count -= size; - size = 0; - } - set_fs(fs); - - return size; -} - -#if !(CFS_O_CREAT == O_CREAT && CFS_O_EXCL == O_EXCL && \ - CFS_O_TRUNC == O_TRUNC && CFS_O_APPEND == O_APPEND &&\ - CFS_O_NONBLOCK == O_NONBLOCK && CFS_O_NDELAY == O_NDELAY &&\ - CFS_O_SYNC == O_SYNC && CFS_O_ASYNC == FASYNC &&\ - CFS_O_DIRECT == O_DIRECT && CFS_O_LARGEFILE == O_LARGEFILE &&\ - CFS_O_DIRECTORY == O_DIRECTORY && CFS_O_NOFOLLOW == O_NOFOLLOW) - -int cfs_oflags2univ(int flags) -{ - int f; - - f = flags & O_ACCMODE; - f |= (flags & O_CREAT) ? CFS_O_CREAT: 0; - f |= (flags & O_EXCL) ? CFS_O_EXCL: 0; - f |= (flags & O_NOCTTY) ? CFS_O_NOCTTY: 0; - f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0; - f |= (flags & O_APPEND) ? CFS_O_APPEND: 0; - f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0; - f |= (flags & O_SYNC)? CFS_O_SYNC: 0; - f |= (flags & FASYNC)? CFS_O_ASYNC: 0; - f |= (flags & O_DIRECTORY)? CFS_O_DIRECTORY: 0; - f |= (flags & O_DIRECT)? CFS_O_DIRECT: 0; - f |= (flags & O_LARGEFILE)? CFS_O_LARGEFILE: 0; - f |= (flags & O_NOFOLLOW)? CFS_O_NOFOLLOW: 0; - f |= (flags & O_NOATIME)? CFS_O_NOATIME: 0; - return f; -} -#else - -int cfs_oflags2univ(int flags) -{ - return (flags); -} -#endif - -/* - * XXX Liang: we don't need cfs_univ2oflags() now. - */ -int cfs_univ2oflags(int flags) -{ - return (flags); -} - -EXPORT_SYMBOL(cfs_filp_open); -EXPORT_SYMBOL(cfs_user_write); -EXPORT_SYMBOL(cfs_oflags2univ); -EXPORT_SYMBOL(cfs_univ2oflags); diff --git a/lnet/libcfs/linux/linux-lock.c b/lnet/libcfs/linux/linux-lock.c deleted file mode 100644 index 01511d6337a1ae5229c408e54daf2f51761b8b4b..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-lock.c +++ /dev/null @@ -1,4 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - -#include <arch-linux/cfs_lock.h> -#include <libcfs/libcfs.h> diff --git a/lnet/libcfs/linux/linux-lwt.c b/lnet/libcfs/linux/linux-lwt.c deleted file mode 100644 index 520c54ce68573bf1c5df463fdc2583df116429a1..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-lwt.c +++ /dev/null @@ -1,2 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - diff --git a/lnet/libcfs/linux/linux-mem.c b/lnet/libcfs/linux/linux-mem.c deleted file mode 100644 index 30ecf6ad7255071bd8bdff6ad3fde297633187b7..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-mem.c +++ /dev/null @@ -1,145 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/mm.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> -#include <linux/highmem.h> -#include <libcfs/libcfs.h> - -static unsigned int cfs_alloc_flags_to_gfp(u_int32_t flags) -{ - unsigned int mflags = 0; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - if (flags & CFS_ALLOC_ATOMIC) - mflags |= __GFP_HIGH; - else if (flags & CFS_ALLOC_WAIT) - mflags |= __GFP_WAIT; - else - mflags |= (__GFP_HIGH | __GFP_WAIT); - if (flags & CFS_ALLOC_IO) - mflags |= __GFP_IO | __GFP_HIGHIO; -#else - if (flags & CFS_ALLOC_ATOMIC) - mflags |= __GFP_HIGH; - else - mflags |= __GFP_WAIT; - if (flags & CFS_ALLOC_NOWARN) - mflags |= __GFP_NOWARN; - if (flags & CFS_ALLOC_IO) - mflags |= __GFP_IO; -#endif - if (flags & CFS_ALLOC_FS) - mflags |= __GFP_FS; - if (flags & CFS_ALLOC_HIGH) - mflags |= __GFP_HIGH; - return mflags; -} - -void * -cfs_alloc(size_t nr_bytes, u_int32_t flags) -{ - void *ptr = NULL; - - ptr = kmalloc(nr_bytes, cfs_alloc_flags_to_gfp(flags)); - if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) - memset(ptr, 0, nr_bytes); - return ptr; -} - -void -cfs_free(void *addr) -{ - kfree(addr); -} - -void * -cfs_alloc_large(size_t nr_bytes) -{ - return vmalloc(nr_bytes); -} - -void -cfs_free_large(void *addr) -{ - vfree(addr); -} - -cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order) -{ - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - return alloc_pages(cfs_alloc_flags_to_gfp(flags), order); -} - -void __cfs_free_pages(cfs_page_t *page, unsigned int order) -{ - __free_pages(page, order); -} - -cfs_mem_cache_t * -cfs_mem_cache_create (const char *name, size_t size, size_t offset, - unsigned long flags) -{ -#ifdef HAVE_KMEM_CACHE_CREATE_DTOR - return kmem_cache_create(name, size, offset, flags, NULL, NULL); -#else - return kmem_cache_create(name, size, offset, flags, NULL); -#endif -} - -int -cfs_mem_cache_destroy (cfs_mem_cache_t * cachep) -{ -#ifdef HAVE_KMEM_CACHE_DESTROY_INT - return kmem_cache_destroy(cachep); -#else - kmem_cache_destroy(cachep); - return 0; -#endif -} - -void * -cfs_mem_cache_alloc(cfs_mem_cache_t *cachep, int flags) -{ - return kmem_cache_alloc(cachep, cfs_alloc_flags_to_gfp(flags)); -} - -void -cfs_mem_cache_free(cfs_mem_cache_t *cachep, void *objp) -{ - return kmem_cache_free(cachep, objp); -} - -EXPORT_SYMBOL(cfs_alloc); -EXPORT_SYMBOL(cfs_free); -EXPORT_SYMBOL(cfs_alloc_large); -EXPORT_SYMBOL(cfs_free_large); -EXPORT_SYMBOL(cfs_alloc_pages); -EXPORT_SYMBOL(__cfs_free_pages); -EXPORT_SYMBOL(cfs_mem_cache_create); -EXPORT_SYMBOL(cfs_mem_cache_destroy); -EXPORT_SYMBOL(cfs_mem_cache_alloc); -EXPORT_SYMBOL(cfs_mem_cache_free); diff --git a/lnet/libcfs/linux/linux-module.c b/lnet/libcfs/linux/linux-module.c deleted file mode 100644 index 6f21853bfc387be7438b722a64433785fa6fe93f..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-module.c +++ /dev/null @@ -1,151 +0,0 @@ -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#define LNET_MINOR 240 - -int libcfs_ioctl_getdata(char *buf, char *end, void *arg) -{ - struct libcfs_ioctl_hdr *hdr; - struct libcfs_ioctl_data *data; - int err; - ENTRY; - - hdr = (struct libcfs_ioctl_hdr *)buf; - data = (struct libcfs_ioctl_data *)buf; - - err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); - if (err) - RETURN(err); - - if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { - CERROR("PORTALS: version mismatch kernel vs application\n"); - RETURN(-EINVAL); - } - - if (hdr->ioc_len + buf >= end) { - CERROR("PORTALS: user buffer exceeds kernel buffer\n"); - RETURN(-EINVAL); - } - - - if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { - CERROR("PORTALS: user buffer too small for ioctl\n"); - RETURN(-EINVAL); - } - - err = copy_from_user(buf, (void *)arg, hdr->ioc_len); - if (err) - RETURN(err); - - if (libcfs_ioctl_is_invalid(data)) { - CERROR("PORTALS: ioctl not correctly formatted\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1) - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - - if (data->ioc_inllen2) - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1); - - RETURN(0); -} - -int libcfs_ioctl_popdata(void *arg, void *data, int size) -{ - if (copy_to_user((char *)arg, data, size)) - return -EFAULT; - return 0; -} - -extern struct cfs_psdev_ops libcfs_psdev_ops; - -static int -libcfs_psdev_open(struct inode * inode, struct file * file) -{ - struct libcfs_device_userstate **pdu = NULL; - int rc = 0; - - if (!inode) - return (-EINVAL); - pdu = (struct libcfs_device_userstate **)&file->private_data; - if (libcfs_psdev_ops.p_open != NULL) - rc = libcfs_psdev_ops.p_open(0, (void *)pdu); - else - return (-EPERM); - return rc; -} - -/* called when closing /dev/device */ -static int -libcfs_psdev_release(struct inode * inode, struct file * file) -{ - struct libcfs_device_userstate *pdu; - int rc = 0; - - if (!inode) - return (-EINVAL); - pdu = file->private_data; - if (libcfs_psdev_ops.p_close != NULL) - rc = libcfs_psdev_ops.p_close(0, (void *)pdu); - else - rc = -EPERM; - return rc; -} - -static int -libcfs_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct cfs_psdev_file pfile; - int rc = 0; - - if (current->fsuid != 0) - return -EACCES; - - if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || - _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || - _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) { - CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - return (-EINVAL); - } - - /* Handle platform-dependent IOC requests */ - switch (cmd) { - case IOC_LIBCFS_PANIC: - if (!capable (CAP_SYS_BOOT)) - return (-EPERM); - panic("debugctl-invoked panic"); - return (0); - case IOC_LIBCFS_MEMHOG: - if (!capable (CAP_SYS_ADMIN)) - return -EPERM; - /* go thought */ - } - - pfile.off = 0; - pfile.private_data = file->private_data; - if (libcfs_psdev_ops.p_ioctl != NULL) - rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); - else - rc = -EPERM; - return (rc); -} - -static struct file_operations libcfs_fops = { - ioctl: libcfs_ioctl, - open: libcfs_psdev_open, - release: libcfs_psdev_release -}; - -cfs_psdev_t libcfs_dev = { - LNET_MINOR, - "lnet", - &libcfs_fops -}; - - diff --git a/lnet/libcfs/linux/linux-prim.c b/lnet/libcfs/linux/linux-prim.c deleted file mode 100644 index cc028294429178c0866b1142f6a3790d536515e9..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-prim.c +++ /dev/null @@ -1,154 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <libcfs/libcfs.h> - -#if defined(CONFIG_KGDB) -#include <asm/kgdb.h> -#endif - -void cfs_enter_debugger(void) -{ -#if defined(CONFIG_KGDB) - BREAKPOINT(); -#elif defined(__arch_um__) - asm("int $3"); -#else - /* nothing */ -#endif -} - -void cfs_daemonize(char *str) { - unsigned long flags; - - lock_kernel(); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63)) - daemonize(str); -#else - daemonize(); - exit_files(current); - reparent_to_init(); - snprintf (current->comm, sizeof (current->comm), "%s", str); -#endif - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - unlock_kernel(); -} - -int cfs_daemonize_ctxt(char *str) { - struct task_struct *tsk = current; - struct fs_struct *fs = NULL; - - cfs_daemonize(str); - fs = copy_fs_struct(tsk->fs); - if (fs == NULL) - return -ENOMEM; - exit_fs(tsk); - tsk->fs = fs; - return 0; -} - - -sigset_t -cfs_get_blockedsigs(void) -{ - unsigned long flags; - sigset_t old; - - SIGNAL_MASK_LOCK(current, flags); - old = current->blocked; - SIGNAL_MASK_UNLOCK(current, flags); - return old; -} - -sigset_t -cfs_block_allsigs(void) -{ - unsigned long flags; - sigset_t old; - - SIGNAL_MASK_LOCK(current, flags); - old = current->blocked; - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - - return old; -} - -sigset_t -cfs_block_sigs(sigset_t bits) -{ - unsigned long flags; - sigset_t old; - - SIGNAL_MASK_LOCK(current, flags); - old = current->blocked; - current->blocked = bits; - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - return old; -} - -void -cfs_restore_sigs (cfs_sigset_t old) -{ - unsigned long flags; - - SIGNAL_MASK_LOCK(current, flags); - current->blocked = old; - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); -} - -int -cfs_signal_pending(void) -{ - return signal_pending(current); -} - -void -cfs_clear_sigpending(void) -{ - unsigned long flags; - - SIGNAL_MASK_LOCK(current, flags); - CLEAR_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); -} - -int -libcfs_arch_init(void) -{ - return 0; -} - -void -libcfs_arch_cleanup(void) -{ - return; -} - -EXPORT_SYMBOL(libcfs_arch_init); -EXPORT_SYMBOL(libcfs_arch_cleanup); -EXPORT_SYMBOL(cfs_daemonize); -EXPORT_SYMBOL(cfs_daemonize_ctxt); -EXPORT_SYMBOL(cfs_block_allsigs); -EXPORT_SYMBOL(cfs_block_sigs); -EXPORT_SYMBOL(cfs_get_blockedsigs); -EXPORT_SYMBOL(cfs_restore_sigs); -EXPORT_SYMBOL(cfs_signal_pending); -EXPORT_SYMBOL(cfs_clear_sigpending); diff --git a/lnet/libcfs/linux/linux-proc.c b/lnet/libcfs/linux/linux-proc.c deleted file mode 100644 index ae3312a50c3ee2a4072fa57651df44814a104bba..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-proc.c +++ /dev/null @@ -1,443 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <net/sock.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <asm/uaccess.h> - -#include <linux/proc_fs.h> -#include <linux/sysctl.h> - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <asm/div64.h> -#include "tracefile.h" - -static cfs_sysctl_table_header_t *lnet_table_header = NULL; -extern char lnet_upcall[1024]; - -#define PSDEV_LNET (0x100) -enum { - PSDEV_DEBUG = 1, /* control debugging */ - PSDEV_SUBSYSTEM_DEBUG, /* control debugging */ - PSDEV_PRINTK, /* force all messages to console */ - PSDEV_CONSOLE_RATELIMIT, /* ratelimit console messages */ - PSDEV_CONSOLE_MAX_DELAY_CS, /* maximum delay over which we skip messages */ - PSDEV_CONSOLE_MIN_DELAY_CS, /* initial delay over which we skip messages */ - PSDEV_CONSOLE_BACKOFF, /* delay increase factor */ - PSDEV_DEBUG_PATH, /* crashdump log location */ - PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */ - PSDEV_LNET_UPCALL, /* User mode upcall script */ - PSDEV_LNET_MEMUSED, /* bytes currently PORTAL_ALLOCated */ - PSDEV_LNET_CATASTROPHE, /* if we have LBUGged or panic'd */ - PSDEV_LNET_PANIC_ON_LBUG, /* flag to panic on LBUG */ - PSDEV_LNET_DUMP_KERNEL, /* snapshot kernel debug buffer to file */ - PSDEV_LNET_DAEMON_FILE, /* spool kernel debug buffer to file */ - PSDEV_LNET_DEBUG_MB, /* size of debug buffer */ -}; - -static int -proc_call_handler(void *data, int write, - loff_t *ppos, void *buffer, size_t *lenp, - int (*handler)(void *data, int write, - loff_t pos, void *buffer, int len)) -{ - int rc = handler(data, write, *ppos, buffer, *lenp); - - if (rc < 0) - return rc; - - if (write) { - *ppos += *lenp; - } else { - *lenp = rc; - *ppos += rc; - } - return 0; -} - -#define DECLARE_PROC_HANDLER(name) \ -static int \ -LL_PROC_PROTO(name) \ -{ \ - DECLARE_LL_PROC_PPOS_DECL; \ - \ - return proc_call_handler(table->data, write, \ - ppos, buffer, lenp, \ - __##name); \ -} - -static int __proc_dobitmasks(void *data, int write, - loff_t pos, void *buffer, int nob) -{ - const int tmpstrlen = 512; - char *tmpstr; - int rc; - unsigned int *mask = data; - int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; - int is_printk = (mask == &libcfs_printk) ? 1 : 0; - - rc = trace_allocate_string_buffer(&tmpstr, tmpstrlen); - if (rc < 0) - return rc; - - if (!write) { - libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys); - rc = strlen(tmpstr); - - if (pos >= rc) { - rc = 0; - } else { - rc = trace_copyout_string(buffer, nob, - tmpstr + pos, "\n"); - } - } else { - rc = trace_copyin_string(tmpstr, tmpstrlen, buffer, nob); - if (rc < 0) - return rc; - - rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys); - /* Always print LBUG/LASSERT to console, so keep this mask */ - if (is_printk) - *mask |= D_EMERG; - } - - trace_free_string_buffer(tmpstr, tmpstrlen); - return rc; -} - -DECLARE_PROC_HANDLER(proc_dobitmasks) - -static int __proc_dump_kernel(void *data, int write, - loff_t pos, void *buffer, int nob) -{ - if (!write) - return 0; - - return trace_dump_debug_buffer_usrstr(buffer, nob); -} - -DECLARE_PROC_HANDLER(proc_dump_kernel) - -static int __proc_daemon_file(void *data, int write, - loff_t pos, void *buffer, int nob) -{ - if (!write) { - int len = strlen(tracefile); - - if (pos >= len) - return 0; - - return trace_copyout_string(buffer, nob, - tracefile + pos, "\n"); - } - - return trace_daemon_command_usrstr(buffer, nob); -} - -DECLARE_PROC_HANDLER(proc_daemon_file) - -static int __proc_debug_mb(void *data, int write, - loff_t pos, void *buffer, int nob) -{ - if (!write) { - char tmpstr[32]; - int len = snprintf(tmpstr, sizeof(tmpstr), "%d", - trace_get_debug_mb()); - - if (pos >= len) - return 0; - - return trace_copyout_string(buffer, nob, tmpstr + pos, "\n"); - } - - return trace_set_debug_mb_usrstr(buffer, nob); -} - -DECLARE_PROC_HANDLER(proc_debug_mb) - -int LL_PROC_PROTO(proc_console_max_delay_cs) -{ - int rc, max_delay_cs; - cfs_sysctl_table_t dummy = *table; - cfs_duration_t d; - - dummy.data = &max_delay_cs; - dummy.proc_handler = &proc_dointvec; - - if (!write) { /* read */ - max_delay_cs = cfs_duration_sec(libcfs_console_max_delay * 100); - rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); - return rc; - } - - /* write */ - max_delay_cs = 0; - rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); - if (rc < 0) - return rc; - if (max_delay_cs <= 0) - return -EINVAL; - - d = cfs_time_seconds(max_delay_cs) / 100; - if (d == 0 || d < libcfs_console_min_delay) - return -EINVAL; - libcfs_console_max_delay = d; - - return rc; -} - -int LL_PROC_PROTO(proc_console_min_delay_cs) -{ - int rc, min_delay_cs; - cfs_sysctl_table_t dummy = *table; - cfs_duration_t d; - - dummy.data = &min_delay_cs; - dummy.proc_handler = &proc_dointvec; - - if (!write) { /* read */ - min_delay_cs = cfs_duration_sec(libcfs_console_min_delay * 100); - rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); - return rc; - } - - /* write */ - min_delay_cs = 0; - rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); - if (rc < 0) - return rc; - if (min_delay_cs <= 0) - return -EINVAL; - - d = cfs_time_seconds(min_delay_cs) / 100; - if (d == 0 || d > libcfs_console_max_delay) - return -EINVAL; - libcfs_console_min_delay = d; - - return rc; -} - -int LL_PROC_PROTO(proc_console_backoff) -{ - int rc, backoff; - cfs_sysctl_table_t dummy = *table; - - dummy.data = &backoff; - dummy.proc_handler = &proc_dointvec; - - if (!write) { /* read */ - backoff= libcfs_console_backoff; - rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); - return rc; - } - - /* write */ - backoff = 0; - rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); - if (rc < 0) - return rc; - if (backoff <= 0) - return -EINVAL; - - libcfs_console_backoff = backoff; - - return rc; -} - -static cfs_sysctl_table_t lnet_table[] = { - /* - * NB No .strategy entries have been provided since sysctl(8) prefers - * to go via /proc for portability. - */ - { - .ctl_name = PSDEV_DEBUG, - .procname = "debug", - .data = &libcfs_debug, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks - }, - { - .ctl_name = PSDEV_SUBSYSTEM_DEBUG, - .procname = "subsystem_debug", - .data = &libcfs_subsystem_debug, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks - }, - { - .ctl_name = PSDEV_PRINTK, - .procname = "printk", - .data = &libcfs_printk, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks - }, - { - .ctl_name = PSDEV_CONSOLE_RATELIMIT, - .procname = "console_ratelimit", - .data = &libcfs_console_ratelimit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = PSDEV_CONSOLE_MAX_DELAY_CS, - .procname = "console_max_delay_centisecs", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_console_max_delay_cs - }, - { - .ctl_name = PSDEV_CONSOLE_MIN_DELAY_CS, - .procname = "console_min_delay_centisecs", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_console_min_delay_cs - }, - { - .ctl_name = PSDEV_CONSOLE_BACKOFF, - .procname = "console_backoff", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_console_backoff - }, - - { - .ctl_name = PSDEV_DEBUG_PATH, - .procname = "debug_path", - .data = debug_file_path, - .maxlen = sizeof(debug_file_path), - .mode = 0644, - .proc_handler = &proc_dostring, - }, - - { - .ctl_name = PSDEV_LNET_UPCALL, - .procname = "upcall", - .data = lnet_upcall, - .maxlen = sizeof(lnet_upcall), - .mode = 0644, - .proc_handler = &proc_dostring, - }, - { - .ctl_name = PSDEV_LNET_MEMUSED, - .procname = "memused", - .data = (int *)&libcfs_kmemory.counter, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = PSDEV_LNET_CATASTROPHE, - .procname = "catastrophe", - .data = &libcfs_catastrophe, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = PSDEV_LNET_PANIC_ON_LBUG, - .procname = "panic_on_lbug", - .data = &libcfs_panic_on_lbug, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = PSDEV_LNET_DUMP_KERNEL, - .procname = "dump_kernel", - .mode = 0200, - .proc_handler = &proc_dump_kernel, - }, - { - .ctl_name = PSDEV_LNET_DAEMON_FILE, - .procname = "daemon_file", - .mode = 0644, - .proc_handler = &proc_daemon_file, - }, - { - .ctl_name = PSDEV_LNET_DEBUG_MB, - .procname = "debug_mb", - .mode = 0644, - .proc_handler = &proc_debug_mb, - }, - {0} -}; - -static cfs_sysctl_table_t top_table[2] = { - { - .ctl_name = PSDEV_LNET, - .procname = "lnet", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = lnet_table - }, - {0} -}; - -int insert_proc(void) -{ -#ifdef CONFIG_SYSCTL - if (lnet_table_header == NULL) - lnet_table_header = cfs_register_sysctl_table(top_table, 0); -#endif - return 0; -} - -void remove_proc(void) -{ -#ifdef CONFIG_SYSCTL - if (lnet_table_header != NULL) - cfs_unregister_sysctl_table(lnet_table_header); - - lnet_table_header = NULL; -#endif -} diff --git a/lnet/libcfs/linux/linux-sync.c b/lnet/libcfs/linux/linux-sync.c deleted file mode 100644 index 520c54ce68573bf1c5df463fdc2583df116429a1..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-sync.c +++ /dev/null @@ -1,2 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - diff --git a/lnet/libcfs/linux/linux-tcpip.c b/lnet/libcfs/linux/linux-tcpip.c deleted file mode 100644 index e8ceafd7bb2c2594adee8ac6bf4c09373ffe2f60..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-tcpip.c +++ /dev/null @@ -1,683 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> - -#include <linux/if.h> -#include <linux/in.h> -#include <linux/file.h> -/* For sys_open & sys_close */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -#include <linux/syscalls.h> -#else -#include <linux/fs.h> -#endif - -int -libcfs_sock_ioctl(int cmd, unsigned long arg) -{ - mm_segment_t oldmm = get_fs(); - struct socket *sock; - int fd; - int rc; - struct file *sock_filp; - - rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return rc; - } - - fd = sock_map_fd(sock); - if (fd < 0) { - rc = fd; - sock_release(sock); - goto out; - } - - sock_filp = fget(fd); - if (!sock_filp) { - rc = -ENOMEM; - goto out_fd; - } - - set_fs(KERNEL_DS); -#ifdef HAVE_UNLOCKED_IOCTL - if (sock_filp->f_op->unlocked_ioctl) - rc = sock_filp->f_op->unlocked_ioctl(sock_filp, cmd, arg); - else -#endif - { - lock_kernel(); - rc =sock_filp->f_op->ioctl(sock_filp->f_dentry->d_inode, - sock_filp, cmd, arg); - unlock_kernel(); - } - set_fs(oldmm); - - fput(sock_filp); - - out_fd: - sys_close(fd); - out: - return rc; -} - -int -libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask) -{ - struct ifreq ifr; - int nob; - int rc; - __u32 val; - - nob = strnlen(name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - return -EINVAL; - } - - CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); - - strcpy(ifr.ifr_name, name); - rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); - - if (rc != 0) { - CERROR("Can't get flags for interface %s\n", name); - return rc; - } - - if ((ifr.ifr_flags & IFF_UP) == 0) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = *mask = 0; - return 0; - } - - *up = 1; - - strcpy(ifr.ifr_name, name); - ifr.ifr_addr.sa_family = AF_INET; - rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); - - if (rc != 0) { - CERROR("Can't get IP address for interface %s\n", name); - return rc; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - strcpy(ifr.ifr_name, name); - ifr.ifr_addr.sa_family = AF_INET; - rc = libcfs_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr); - - if (rc != 0) { - CERROR("Can't get netmask for interface %s\n", name); - return rc; - } - - val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr; - *mask = ntohl(val); - - return 0; -} - -EXPORT_SYMBOL(libcfs_ipif_query); - -int -libcfs_ipif_enumerate (char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - char **names; - int toobig; - int nalloc; - int nfound; - struct ifreq *ifr; - struct ifconf ifc; - int rc; - int nob; - int i; - - - nalloc = 16; /* first guess at max interfaces */ - toobig = 0; - for (;;) { - if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) { - toobig = 1; - nalloc = CFS_PAGE_SIZE/sizeof(*ifr); - CWARN("Too many interfaces: only enumerating first %d\n", - nalloc); - } - - LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); - if (ifr == NULL) { - CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc); - rc = -ENOMEM; - goto out0; - } - - ifc.ifc_buf = (char *)ifr; - ifc.ifc_len = nalloc * sizeof(*ifr); - - rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc); - - if (rc < 0) { - CERROR ("Error %d enumerating interfaces\n", rc); - goto out1; - } - - LASSERT (rc == 0); - - nfound = ifc.ifc_len/sizeof(*ifr); - LASSERT (nfound <= nalloc); - - if (nfound < nalloc || toobig) - break; - - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - nalloc *= 2; - } - - if (nfound == 0) - goto out1; - - LIBCFS_ALLOC(names, nfound * sizeof(*names)); - if (names == NULL) { - rc = -ENOMEM; - goto out1; - } - /* NULL out all names[i] */ - memset (names, 0, nfound * sizeof(*names)); - - for (i = 0; i < nfound; i++) { - - nob = strnlen (ifr[i].ifr_name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, ifr[i].ifr_name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out2; - } - - LIBCFS_ALLOC(names[i], IFNAMSIZ); - if (names[i] == NULL) { - rc = -ENOMEM; - goto out2; - } - - memcpy(names[i], ifr[i].ifr_name, nob); - names[i][nob] = 0; - } - - *namesp = names; - rc = nfound; - - out2: - if (rc < 0) - libcfs_ipif_free_enumeration(names, nfound); - out1: - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - out0: - return rc; -} - -EXPORT_SYMBOL(libcfs_ipif_enumerate); - -void -libcfs_ipif_free_enumeration (char **names, int n) -{ - int i; - - LASSERT (n > 0); - - for (i = 0; i < n && names[i] != NULL; i++) - LIBCFS_FREE(names[i], IFNAMSIZ); - - LIBCFS_FREE(names, n * sizeof(*names)); -} - -EXPORT_SYMBOL(libcfs_ipif_free_enumeration); - -int -libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - mm_segment_t oldmm = get_fs(); - long ticks = timeout * HZ; - unsigned long then; - struct timeval tv; - - LASSERT (nob > 0); - /* Caller may pass a zero timeout if she thinks the socket buffer is - * empty enough to take the whole message immediately */ - - for (;;) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0 - }; - - if (timeout != 0) { - /* Set send timeout to remaining time */ - tv = (struct timeval) { - .tv_sec = ticks / HZ, - .tv_usec = ((ticks % HZ) * 1000000) / HZ - }; - set_fs(KERNEL_DS); - rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, - (char *)&tv, sizeof(tv)); - set_fs(oldmm); - if (rc != 0) { - CERROR("Can't set socket send timeout " - "%ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - } - - set_fs (KERNEL_DS); - then = jiffies; - rc = sock_sendmsg (sock, &msg, iov.iov_len); - ticks -= jiffies - then; - set_fs (oldmm); - - if (rc == nob) - return 0; - - if (rc < 0) - return rc; - - if (rc == 0) { - CERROR ("Unexpected zero rc\n"); - return (-ECONNABORTED); - } - - if (ticks <= 0) - return -EAGAIN; - - buffer = ((char *)buffer) + rc; - nob -= rc; - } - - return (0); -} -EXPORT_SYMBOL(libcfs_sock_write); - -int -libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - mm_segment_t oldmm = get_fs(); - long ticks = timeout * HZ; - unsigned long then; - struct timeval tv; - - LASSERT (nob > 0); - LASSERT (ticks > 0); - - for (;;) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - - /* Set receive timeout to remaining time */ - tv = (struct timeval) { - .tv_sec = ticks / HZ, - .tv_usec = ((ticks % HZ) * 1000000) / HZ - }; - set_fs(KERNEL_DS); - rc = sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, - (char *)&tv, sizeof(tv)); - set_fs(oldmm); - if (rc != 0) { - CERROR("Can't set socket recv timeout %ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - - set_fs(KERNEL_DS); - then = jiffies; - rc = sock_recvmsg(sock, &msg, iov.iov_len, 0); - ticks -= jiffies - then; - set_fs(oldmm); - - if (rc < 0) - return rc; - - if (rc == 0) - return -ECONNRESET; - - buffer = ((char *)buffer) + rc; - nob -= rc; - - if (nob == 0) - return 0; - - if (ticks <= 0) - return -ETIMEDOUT; - } -} - -EXPORT_SYMBOL(libcfs_sock_read); - -static int -libcfs_sock_create (struct socket **sockp, int *fatal, - __u32 local_ip, int local_port) -{ - struct sockaddr_in locaddr; - struct socket *sock; - int rc; - int option; - mm_segment_t oldmm = get_fs(); - - /* All errors are fatal except bind failure if the port is in use */ - *fatal = 1; - - rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); - *sockp = sock; - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (rc); - } - - set_fs (KERNEL_DS); - option = 1; - rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); - goto failed; - } - - if (local_ip != 0 || local_port != 0) { - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons(local_port); - locaddr.sin_addr.s_addr = (local_ip == 0) ? - INADDR_ANY : htonl(local_ip); - - rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr, - sizeof(locaddr)); - if (rc == -EADDRINUSE) { - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *fatal = 0; - goto failed; - } - if (rc != 0) { - CERROR("Error trying to bind to port %d: %d\n", - local_port, rc); - goto failed; - } - } - - return 0; - - failed: - sock_release(sock); - return rc; -} - -int -libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize) -{ - mm_segment_t oldmm = get_fs(); - int option; - int rc; - - if (txbufsize != 0) { - option = txbufsize; - set_fs (KERNEL_DS); - rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set send buffer %d: %d\n", - option, rc); - return (rc); - } - } - - if (rxbufsize != 0) { - option = rxbufsize; - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set receive buffer %d: %d\n", - option, rc); - return (rc); - } - } - - return 0; -} - -EXPORT_SYMBOL(libcfs_sock_setbuf); - -int -libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port) -{ - struct sockaddr_in sin; - int len = sizeof (sin); - int rc; - - rc = sock->ops->getname (sock, (struct sockaddr *)&sin, &len, - remote ? 2 : 0); - if (rc != 0) { - CERROR ("Error %d getting sock %s IP/port\n", - rc, remote ? "peer" : "local"); - return rc; - } - - if (ip != NULL) - *ip = ntohl (sin.sin_addr.s_addr); - - if (port != NULL) - *port = ntohs (sin.sin_port); - - return 0; -} - -EXPORT_SYMBOL(libcfs_sock_getaddr); - -int -libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize) -{ - - if (txbufsize != NULL) { - *txbufsize = sock->sk->sk_sndbuf; - } - - if (rxbufsize != NULL) { - *rxbufsize = sock->sk->sk_rcvbuf; - } - - return 0; -} - -EXPORT_SYMBOL(libcfs_sock_getbuf); - -int -libcfs_sock_listen (struct socket **sockp, - __u32 local_ip, int local_port, int backlog) -{ - int fatal; - int rc; - - rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port); - if (rc != 0) { - if (!fatal) - CERROR("Can't create socket: port %d already in use\n", - local_port); - return rc; - } - - rc = (*sockp)->ops->listen(*sockp, backlog); - if (rc == 0) - return 0; - - CERROR("Can't set listen backlog %d: %d\n", backlog, rc); - sock_release(*sockp); - return rc; -} - -EXPORT_SYMBOL(libcfs_sock_listen); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) -int sock_create_lite(int family, int type, int protocol, struct socket **res) -{ - struct socket *sock; - - sock = sock_alloc(); - if (sock == NULL) - return -ENOMEM; - - sock->type = type; - *res = sock; - - return 0; -} -#endif - -int -libcfs_sock_accept (struct socket **newsockp, struct socket *sock) -{ - wait_queue_t wait; - struct socket *newsock; - int rc; - - init_waitqueue_entry(&wait, current); - - /* XXX this should add a ref to sock->ops->owner, if - * TCP could be a module */ - rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock); - if (rc) { - CERROR("Can't allocate socket\n"); - return rc; - } - - newsock->ops = sock->ops; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(sock->sk->sk_sleep, &wait); - - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); - if (rc == -EAGAIN) { - /* Nothing ready, so wait for activity */ - schedule(); - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); - } - - remove_wait_queue(sock->sk->sk_sleep, &wait); - set_current_state(TASK_RUNNING); - - if (rc != 0) - goto failed; - - *newsockp = newsock; - return 0; - - failed: - sock_release(newsock); - return rc; -} - -EXPORT_SYMBOL(libcfs_sock_accept); - -void -libcfs_sock_abort_accept (struct socket *sock) -{ - wake_up_all(sock->sk->sk_sleep); -} - -EXPORT_SYMBOL(libcfs_sock_abort_accept); - -int -libcfs_sock_connect (struct socket **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port) -{ - struct sockaddr_in srvaddr; - int rc; - - rc = libcfs_sock_create(sockp, fatal, local_ip, local_port); - if (rc != 0) - return rc; - - memset (&srvaddr, 0, sizeof (srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(peer_port); - srvaddr.sin_addr.s_addr = htonl(peer_ip); - - rc = (*sockp)->ops->connect(*sockp, - (struct sockaddr *)&srvaddr, sizeof(srvaddr), - 0); - if (rc == 0) - return 0; - - /* EADDRNOTAVAIL probably means we're already connected to the same - * peer/port on the same local port on a differently typed - * connection. Let our caller retry with a different local - * port... */ - *fatal = !(rc == -EADDRNOTAVAIL); - - CDEBUG(*fatal ? D_NETERROR : D_NET, - "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, - HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); - - sock_release(*sockp); - return rc; -} - -EXPORT_SYMBOL(libcfs_sock_connect); - -void -libcfs_sock_release (struct socket *sock) -{ - sock_release(sock); -} - -EXPORT_SYMBOL(libcfs_sock_release); diff --git a/lnet/libcfs/linux/linux-tracefile.c b/lnet/libcfs/linux/linux-tracefile.c deleted file mode 100644 index 59560278a5aad112ca440add917c7d633a514544..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-tracefile.c +++ /dev/null @@ -1,266 +0,0 @@ -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -#ifndef get_cpu -#define get_cpu() smp_processor_id() -#define put_cpu() do { } while (0) -#endif - -/* three types of trace_data in linux */ -enum { - TCD_TYPE_PROC = 0, - TCD_TYPE_SOFTIRQ, - TCD_TYPE_IRQ, - TCD_TYPE_MAX -}; - -/* percents to share the total debug memory for each type */ -static unsigned int pages_factor[TCD_TYPE_MAX] = { - 80, /* 80% pages for TCD_TYPE_PROC */ - 10, /* 10% pages for TCD_TYPE_SOFTIRQ */ - 10 /* 10% pages for TCD_TYPE_IRQ */ -}; - -char *trace_console_buffers[NR_CPUS][3]; - -struct rw_semaphore tracefile_sem; - -int tracefile_init_arch() -{ - int i; - int j; - struct trace_cpu_data *tcd; - - init_rwsem(&tracefile_sem); - - /* initialize trace_data */ - memset(trace_data, 0, sizeof(trace_data)); - for (i = 0; i < TCD_TYPE_MAX; i++) { - trace_data[i]=kmalloc(sizeof(union trace_data_union)*NR_CPUS, - GFP_KERNEL); - if (trace_data[i] == NULL) - goto out; - - } - - /* arch related info initialized */ - tcd_for_each(tcd, i, j) { - tcd->tcd_pages_factor = pages_factor[i]; - tcd->tcd_type = i; - tcd->tcd_cpu = j; - } - - for (i = 0; i < num_possible_cpus(); i++) - for (j = 0; j < 3; j++) { - trace_console_buffers[i][j] = - kmalloc(TRACE_CONSOLE_BUFFER_SIZE, - GFP_KERNEL); - - if (trace_console_buffers[i][j] == NULL) - goto out; - } - - return 0; - -out: - tracefile_fini_arch(); - printk(KERN_ERR "lnet: No enough memory\n"); - return -ENOMEM; - -} - -void tracefile_fini_arch() -{ - int i; - int j; - - for (i = 0; i < num_possible_cpus(); i++) - for (j = 0; j < 3; j++) - if (trace_console_buffers[i][j] != NULL) { - kfree(trace_console_buffers[i][j]); - trace_console_buffers[i][j] = NULL; - } - - for (i = 0; trace_data[i] != NULL; i++) { - kfree(trace_data[i]); - trace_data[i] = NULL; - } -} - -void tracefile_read_lock() -{ - down_read(&tracefile_sem); -} - -void tracefile_read_unlock() -{ - up_read(&tracefile_sem); -} - -void tracefile_write_lock() -{ - down_write(&tracefile_sem); -} - -void tracefile_write_unlock() -{ - up_write(&tracefile_sem); -} - -char * -trace_get_console_buffer(void) -{ - int cpu = get_cpu(); - int idx; - - if (in_irq()) { - idx = 0; - } else if (in_softirq()) { - idx = 1; - } else { - idx = 2; - } - - return trace_console_buffers[cpu][idx]; -} - -void -trace_put_console_buffer(char *buffer) -{ - put_cpu(); -} - -struct trace_cpu_data * -trace_get_tcd(void) -{ - int cpu; - - cpu = get_cpu(); - if (in_irq()) - return &(*trace_data[TCD_TYPE_IRQ])[cpu].tcd; - else if (in_softirq()) - return &(*trace_data[TCD_TYPE_SOFTIRQ])[cpu].tcd; - return &(*trace_data[TCD_TYPE_PROC])[cpu].tcd; -} - -void -trace_put_tcd (struct trace_cpu_data *tcd) -{ - put_cpu(); -} - -int trace_lock_tcd(struct trace_cpu_data *tcd) -{ - __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); - if (tcd->tcd_type == TCD_TYPE_IRQ) - local_irq_disable(); - else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ) - local_bh_disable(); - return 1; -} - -void trace_unlock_tcd(struct trace_cpu_data *tcd) -{ - __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); - if (tcd->tcd_type == TCD_TYPE_IRQ) - local_irq_enable(); - else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ) - local_bh_enable(); -} - -int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) -{ - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - return tcd->tcd_cpu == tage->cpu; -} - -void -set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, - const int line, unsigned long stack) -{ - struct timeval tv; - - do_gettimeofday(&tv); - - header->ph_subsys = subsys; - header->ph_mask = mask; - header->ph_cpu_id = smp_processor_id(); - header->ph_sec = (__u32)tv.tv_sec; - header->ph_usec = tv.tv_usec; - header->ph_stack = stack; - header->ph_pid = current->pid; - header->ph_line_num = line; -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - header->ph_extern_pid = current->thread.extern_pid; -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - header->ph_extern_pid = current->thread.mode.tt.extern_pid; -#else - header->ph_extern_pid = 0; -#endif - return; -} - -void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, - int len, const char *file, const char *fn) -{ - char *prefix = "Lustre", *ptype = NULL; - - if ((mask & D_EMERG) != 0) { - prefix = "LustreError"; - ptype = KERN_EMERG; - } else if ((mask & D_ERROR) != 0) { - prefix = "LustreError"; - ptype = KERN_ERR; - } else if ((mask & D_WARNING) != 0) { - prefix = "Lustre"; - ptype = KERN_WARNING; - } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) { - prefix = "Lustre"; - ptype = KERN_INFO; - } - - if ((mask & D_CONSOLE) != 0) { - printk("%s%s: %.*s", ptype, prefix, len, buf); - } else { - printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid, - hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf); - } - return; -} - -int trace_max_debug_mb(void) -{ - int total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT)); - - return MAX(512, (total_mb * 80)/100); -} - -void -trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) -{ - cpumask_t cpus_allowed = current->cpus_allowed; - /* use cpus_allowed to quiet 2.4 UP kernel warning only */ - cpumask_t m = cpus_allowed; - int cpu; - - /* Run the given routine on every CPU in thread context */ - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { - if (!cpu_online(cpu)) - continue; - - cpus_clear(m); - cpu_set(cpu, m); - set_cpus_allowed(current, m); - - fn(arg); - - set_cpus_allowed(current, cpus_allowed); - } -} diff --git a/lnet/libcfs/linux/linux-utils.c b/lnet/libcfs/linux/linux-utils.c deleted file mode 100644 index 60f7cb879aabd86fc8a0b1fcf32ae85bc777e36b..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-utils.c +++ /dev/null @@ -1,60 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -/* - * miscellaneous libcfs stuff - */ -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/types.h> - -/* - * Convert server error code to client format. Error codes are from - * Linux errno.h, so for Linux client---identity. - */ -int convert_server_error(__u64 ecode) -{ - return ecode; -} -EXPORT_SYMBOL(convert_server_error); - -/* - * convert <fcntl.h> flag from client to server. - */ -int convert_client_oflag(int cflag, int *result) -{ - *result = cflag; - return 0; -} -EXPORT_SYMBOL(convert_client_oflag); - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{} - -EXPORT_SYMBOL(cfs_stack_trace_fill); - -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - return NULL; -} -EXPORT_SYMBOL(cfs_stack_trace_frame); - diff --git a/lnet/libcfs/lwt.c b/lnet/libcfs/lwt.c deleted file mode 100644 index 6455ece775c6d78e3d01a77a65b42fdae3c507c8..0000000000000000000000000000000000000000 --- a/lnet/libcfs/lwt.c +++ /dev/null @@ -1,270 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2003 Cluster File Systems, Inc. - * Author: Eric Barton <eeb@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifndef AUTOCONF_INCLUDED -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/kernel.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/interrupt.h> -#include <asm/system.h> -#include <asm/uaccess.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> - -#if LWT_SUPPORT - -#if !KLWT_SUPPORT -int lwt_enabled; -lwt_cpu_t lwt_cpus[NR_CPUS]; -#endif - -int lwt_pages_per_cpu; - -/* NB only root is allowed to retrieve LWT info; it's an open door into the - * kernel... */ - -int -lwt_lookup_string (int *size, char *knl_ptr, - char *user_ptr, int user_size) -{ - int maxsize = 128; - - /* knl_ptr was retrieved from an LWT snapshot and the caller wants to - * turn it into a string. NB we can crash with an access violation - * trying to determine the string length, so we're trusting our - * caller... */ - - if (!capable(CAP_SYS_ADMIN)) - return (-EPERM); - - if (user_size > 0 && - maxsize > user_size) - maxsize = user_size; - - *size = strnlen (knl_ptr, maxsize - 1) + 1; - - if (user_ptr != NULL) { - if (user_size < 4) - return (-EINVAL); - - if (copy_to_user (user_ptr, knl_ptr, *size)) - return (-EFAULT); - - /* Did I truncate the string? */ - if (knl_ptr[*size - 1] != 0) - copy_to_user (user_ptr + *size - 4, "...", 4); - } - - return (0); -} - -int -lwt_control (int enable, int clear) -{ - lwt_page_t *p; - int i; - int j; - - if (!capable(CAP_SYS_ADMIN)) - return (-EPERM); - - if (!enable) { - LWT_EVENT(0,0,0,0); - lwt_enabled = 0; - mb(); - /* give people some time to stop adding traces */ - schedule_timeout(10); - } - - for (i = 0; i < num_online_cpus(); i++) { - p = lwt_cpus[i].lwtc_current_page; - - if (p == NULL) - return (-ENODATA); - - if (!clear) - continue; - - for (j = 0; j < lwt_pages_per_cpu; j++) { - memset (p->lwtp_events, 0, CFS_PAGE_SIZE); - - p = list_entry (p->lwtp_list.next, - lwt_page_t, lwtp_list); - } - } - - if (enable) { - lwt_enabled = 1; - mb(); - LWT_EVENT(0,0,0,0); - } - - return (0); -} - -int -lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, - void *user_ptr, int user_size) -{ - const int events_per_page = CFS_PAGE_SIZE / sizeof(lwt_event_t); - const int bytes_per_page = events_per_page * sizeof(lwt_event_t); - lwt_page_t *p; - int i; - int j; - - if (!capable(CAP_SYS_ADMIN)) - return (-EPERM); - - *ncpu = num_online_cpus(); - *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page; - *now = get_cycles(); - - if (user_ptr == NULL) - return (0); - - for (i = 0; i < num_online_cpus(); i++) { - p = lwt_cpus[i].lwtc_current_page; - - if (p == NULL) - return (-ENODATA); - - for (j = 0; j < lwt_pages_per_cpu; j++) { - if (copy_to_user(user_ptr, p->lwtp_events, - bytes_per_page)) - return (-EFAULT); - - user_ptr = ((char *)user_ptr) + bytes_per_page; - p = list_entry(p->lwtp_list.next, - lwt_page_t, lwtp_list); - - } - } - - return (0); -} - -int -lwt_init () -{ - int i; - int j; - - for (i = 0; i < num_online_cpus(); i++) - if (lwt_cpus[i].lwtc_current_page != NULL) - return (-EALREADY); - - LASSERT (!lwt_enabled); - - /* NULL pointers, zero scalars */ - memset (lwt_cpus, 0, sizeof (lwt_cpus)); - lwt_pages_per_cpu = LWT_MEMORY / (num_online_cpus() * CFS_PAGE_SIZE); - - for (i = 0; i < num_online_cpus(); i++) - for (j = 0; j < lwt_pages_per_cpu; j++) { - struct page *page = alloc_page (GFP_KERNEL); - lwt_page_t *lwtp; - - if (page == NULL) { - CERROR ("Can't allocate page\n"); - lwt_fini (); - return (-ENOMEM); - } - - LIBCFS_ALLOC(lwtp, sizeof (*lwtp)); - if (lwtp == NULL) { - CERROR ("Can't allocate lwtp\n"); - __free_page(page); - lwt_fini (); - return (-ENOMEM); - } - - lwtp->lwtp_page = page; - lwtp->lwtp_events = page_address(page); - memset (lwtp->lwtp_events, 0, CFS_PAGE_SIZE); - - if (j == 0) { - INIT_LIST_HEAD (&lwtp->lwtp_list); - lwt_cpus[i].lwtc_current_page = lwtp; - } else { - list_add (&lwtp->lwtp_list, - &lwt_cpus[i].lwtc_current_page->lwtp_list); - } - } - - lwt_enabled = 1; - mb(); - - LWT_EVENT(0,0,0,0); - - return (0); -} - -void -lwt_fini () -{ - int i; - - lwt_control(0, 0); - - for (i = 0; i < num_online_cpus(); i++) - while (lwt_cpus[i].lwtc_current_page != NULL) { - lwt_page_t *lwtp = lwt_cpus[i].lwtc_current_page; - - if (list_empty (&lwtp->lwtp_list)) { - lwt_cpus[i].lwtc_current_page = NULL; - } else { - lwt_cpus[i].lwtc_current_page = - list_entry (lwtp->lwtp_list.next, - lwt_page_t, lwtp_list); - - list_del (&lwtp->lwtp_list); - } - - __free_page (lwtp->lwtp_page); - LIBCFS_FREE (lwtp, sizeof (*lwtp)); - } -} - -EXPORT_SYMBOL(lwt_enabled); -EXPORT_SYMBOL(lwt_cpus); - -EXPORT_SYMBOL(lwt_init); -EXPORT_SYMBOL(lwt_fini); -EXPORT_SYMBOL(lwt_lookup_string); -EXPORT_SYMBOL(lwt_control); -EXPORT_SYMBOL(lwt_snapshot); -#endif diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c deleted file mode 100644 index 5e273cbc0adfbe94fc953b769d075df1ee64e342..0000000000000000000000000000000000000000 --- a/lnet/libcfs/module.c +++ /dev/null @@ -1,423 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> -#include <lnet/lnet.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -void -kportal_memhog_free (struct libcfs_device_userstate *ldu) -{ - cfs_page_t **level0p = &ldu->ldu_memhog_root_page; - cfs_page_t **level1p; - cfs_page_t **level2p; - int count1; - int count2; - - if (*level0p != NULL) { - - level1p = (cfs_page_t **)cfs_page_address(*level0p); - count1 = 0; - - while (count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) && - *level1p != NULL) { - - level2p = (cfs_page_t **)cfs_page_address(*level1p); - count2 = 0; - - while (count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) && - *level2p != NULL) { - - cfs_free_page(*level2p); - ldu->ldu_memhog_pages--; - level2p++; - count2++; - } - - cfs_free_page(*level1p); - ldu->ldu_memhog_pages--; - level1p++; - count1++; - } - - cfs_free_page(*level0p); - ldu->ldu_memhog_pages--; - - *level0p = NULL; - } - - LASSERT (ldu->ldu_memhog_pages == 0); -} - -int -kportal_memhog_alloc (struct libcfs_device_userstate *ldu, int npages, int flags) -{ - cfs_page_t **level0p; - cfs_page_t **level1p; - cfs_page_t **level2p; - int count1; - int count2; - - LASSERT (ldu->ldu_memhog_pages == 0); - LASSERT (ldu->ldu_memhog_root_page == NULL); - - if (npages < 0) - return -EINVAL; - - if (npages == 0) - return 0; - - level0p = &ldu->ldu_memhog_root_page; - *level0p = cfs_alloc_page(flags); - if (*level0p == NULL) - return -ENOMEM; - ldu->ldu_memhog_pages++; - - level1p = (cfs_page_t **)cfs_page_address(*level0p); - count1 = 0; - memset(level1p, 0, CFS_PAGE_SIZE); - - while (ldu->ldu_memhog_pages < npages && - count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) { - - if (cfs_signal_pending()) - return (-EINTR); - - *level1p = cfs_alloc_page(flags); - if (*level1p == NULL) - return -ENOMEM; - ldu->ldu_memhog_pages++; - - level2p = (cfs_page_t **)cfs_page_address(*level1p); - count2 = 0; - memset(level2p, 0, CFS_PAGE_SIZE); - - while (ldu->ldu_memhog_pages < npages && - count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) { - - if (cfs_signal_pending()) - return (-EINTR); - - *level2p = cfs_alloc_page(flags); - if (*level2p == NULL) - return (-ENOMEM); - ldu->ldu_memhog_pages++; - - level2p++; - count2++; - } - - level1p++; - count1++; - } - - return 0; -} - -/* called when opening /dev/device */ -static int libcfs_psdev_open(unsigned long flags, void *args) -{ - struct libcfs_device_userstate *ldu; - ENTRY; - - PORTAL_MODULE_USE; - - LIBCFS_ALLOC(ldu, sizeof(*ldu)); - if (ldu != NULL) { - ldu->ldu_memhog_pages = 0; - ldu->ldu_memhog_root_page = NULL; - } - *(struct libcfs_device_userstate **)args = ldu; - - RETURN(0); -} - -/* called when closing /dev/device */ -static int libcfs_psdev_release(unsigned long flags, void *args) -{ - struct libcfs_device_userstate *ldu; - ENTRY; - - ldu = (struct libcfs_device_userstate *)args; - if (ldu != NULL) { - kportal_memhog_free(ldu); - LIBCFS_FREE(ldu, sizeof(*ldu)); - } - - PORTAL_MODULE_UNUSE; - RETURN(0); -} - -static struct rw_semaphore ioctl_list_sem; -static struct list_head ioctl_list; - -int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand) -{ - int rc = 0; - - down_write(&ioctl_list_sem); - if (!list_empty(&hand->item)) - rc = -EBUSY; - else - list_add_tail(&hand->item, &ioctl_list); - up_write(&ioctl_list_sem); - - return rc; -} -EXPORT_SYMBOL(libcfs_register_ioctl); - -int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) -{ - int rc = 0; - - down_write(&ioctl_list_sem); - if (list_empty(&hand->item)) - rc = -ENOENT; - else - list_del_init(&hand->item); - up_write(&ioctl_list_sem); - - return rc; -} -EXPORT_SYMBOL(libcfs_deregister_ioctl); - -static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *arg) -{ - char buf[1024]; - int err = -EINVAL; - struct libcfs_ioctl_data *data; - ENTRY; - - /* 'cmd' and permissions get checked in our arch-specific caller */ - - if (libcfs_ioctl_getdata(buf, buf + 800, (void *)arg)) { - CERROR("PORTALS ioctl: data error\n"); - RETURN(-EINVAL); - } - data = (struct libcfs_ioctl_data *)buf; - - switch (cmd) { - case IOC_LIBCFS_CLEAR_DEBUG: - libcfs_debug_clear_buffer(); - RETURN(0); - /* - * case IOC_LIBCFS_PANIC: - * Handled in arch/cfs_module.c - */ - case IOC_LIBCFS_MARK_DEBUG: - if (data->ioc_inlbuf1 == NULL || - data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') - RETURN(-EINVAL); - libcfs_debug_mark_buffer(data->ioc_inlbuf1); - RETURN(0); -#if LWT_SUPPORT - case IOC_LIBCFS_LWT_CONTROL: - err = lwt_control ((data->ioc_flags & 1) != 0, - (data->ioc_flags & 2) != 0); - break; - - case IOC_LIBCFS_LWT_SNAPSHOT: { - cycles_t now; - int ncpu; - int total_size; - - err = lwt_snapshot (&now, &ncpu, &total_size, - data->ioc_pbuf1, data->ioc_plen1); - data->ioc_u64[0] = now; - data->ioc_u32[0] = ncpu; - data->ioc_u32[1] = total_size; - - /* Hedge against broken user/kernel typedefs (e.g. cycles_t) */ - data->ioc_u32[2] = sizeof(lwt_event_t); - data->ioc_u32[3] = offsetof(lwt_event_t, lwte_where); - - if (err == 0 && - libcfs_ioctl_popdata(arg, data, sizeof (*data))) - err = -EFAULT; - break; - } - - case IOC_LIBCFS_LWT_LOOKUP_STRING: - err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1, - data->ioc_pbuf2, data->ioc_plen2); - if (err == 0 && - libcfs_ioctl_popdata(arg, data, sizeof (*data))) - err = -EFAULT; - break; -#endif - case IOC_LIBCFS_MEMHOG: - if (pfile->private_data == NULL) { - err = -EINVAL; - } else { - kportal_memhog_free(pfile->private_data); - /* XXX The ioc_flags is not GFP flags now, need to be fixed */ - err = kportal_memhog_alloc(pfile->private_data, - data->ioc_count, - data->ioc_flags); - if (err != 0) - kportal_memhog_free(pfile->private_data); - } - break; - - case IOC_LIBCFS_PING_TEST: { - extern void (kping_client)(struct libcfs_ioctl_data *); - void (*ping)(struct libcfs_ioctl_data *); - - CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n", - data->ioc_count, libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(data->ioc_nid)); - ping = PORTAL_SYMBOL_GET(kping_client); - if (!ping) - CERROR("PORTAL_SYMBOL_GET failed\n"); - else { - ping(data); - PORTAL_SYMBOL_PUT(kping_client); - } - RETURN(0); - } - - default: { - struct libcfs_ioctl_handler *hand; - err = -EINVAL; - down_read(&ioctl_list_sem); - list_for_each_entry(hand, &ioctl_list, item) { - err = hand->handle_ioctl(cmd, data); - if (err != -EINVAL) { - if (err == 0) - err = libcfs_ioctl_popdata(arg, - data, sizeof (*data)); - break; - } - } - up_read(&ioctl_list_sem); - break; - } - } - - RETURN(err); -} - -struct cfs_psdev_ops libcfs_psdev_ops = { - libcfs_psdev_open, - libcfs_psdev_release, - NULL, - NULL, - libcfs_ioctl -}; - -extern int insert_proc(void); -extern void remove_proc(void); -MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>"); -MODULE_DESCRIPTION("Portals v3.1"); -MODULE_LICENSE("GPL"); - -extern cfs_psdev_t libcfs_dev; -extern struct rw_semaphore tracefile_sem; -extern struct semaphore trace_thread_sem; - -extern void libcfs_init_nidstrings(void); -extern int libcfs_arch_init(void); -extern void libcfs_arch_cleanup(void); - -static int init_libcfs_module(void) -{ - int rc; - - libcfs_arch_init(); - libcfs_init_nidstrings(); - init_rwsem(&tracefile_sem); - init_mutex(&trace_thread_sem); - init_rwsem(&ioctl_list_sem); - CFS_INIT_LIST_HEAD(&ioctl_list); - - rc = libcfs_debug_init(5 * 1024 * 1024); - if (rc < 0) { - printk(KERN_ERR "LustreError: libcfs_debug_init: %d\n", rc); - return (rc); - } - -#if LWT_SUPPORT - rc = lwt_init(); - if (rc != 0) { - CERROR("lwt_init: error %d\n", rc); - goto cleanup_debug; - } -#endif - rc = cfs_psdev_register(&libcfs_dev); - if (rc) { - CERROR("misc_register: error %d\n", rc); - goto cleanup_lwt; - } - - rc = insert_proc(); - if (rc) { - CERROR("insert_proc: error %d\n", rc); - goto cleanup_deregister; - } - - CDEBUG (D_OTHER, "portals setup OK\n"); - return (0); - - cleanup_deregister: - cfs_psdev_deregister(&libcfs_dev); - cleanup_lwt: -#if LWT_SUPPORT - lwt_fini(); - cleanup_debug: -#endif - libcfs_debug_cleanup(); - return rc; -} - -static void exit_libcfs_module(void) -{ - int rc; - - remove_proc(); - - CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - rc = cfs_psdev_deregister(&libcfs_dev); - if (rc) - CERROR("misc_deregister error %d\n", rc); - -#if LWT_SUPPORT - lwt_fini(); -#endif - - if (atomic_read(&libcfs_kmemory) != 0) - CERROR("Portals memory leaked: %d bytes\n", - atomic_read(&libcfs_kmemory)); - - rc = libcfs_debug_cleanup(); - if (rc) - printk(KERN_ERR "LustreError: libcfs_debug_cleanup: %d\n", rc); - libcfs_arch_cleanup(); -} - -cfs_module(libcfs, "1.0.0", init_libcfs_module, exit_libcfs_module); diff --git a/lnet/libcfs/nidstrings.c b/lnet/libcfs/nidstrings.c deleted file mode 100644 index 5f17f5a2851f8fd3ce7dc34f7c0f43b101943e3f..0000000000000000000000000000000000000000 --- a/lnet/libcfs/nidstrings.c +++ /dev/null @@ -1,540 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lnet.h> -#include <libcfs/kp30.h> -#ifndef __KERNEL__ -#ifdef HAVE_GETHOSTBYNAME -# include <netdb.h> -#endif -#endif - -/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids - * consistent in all conversion functions. Some code fragments are copied - * around for the sake of clarity... - */ - -/* CAVEAT EMPTOR! Racey temporary buffer allocation! - * Choose the number of nidstrings to support the MAXIMUM expected number of - * concurrent users. If there are more, the returned string will be volatile. - * NB this number must allow for a process to be descheduled for a timeslice - * between getting its string and using it. - */ - -#define LNET_NIDSTR_COUNT 128 /* # of nidstrings */ -#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ - -static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE]; -static int libcfs_nidstring_idx = 0; - -#ifdef __KERNEL__ -static spinlock_t libcfs_nidstring_lock; - -void libcfs_init_nidstrings (void) -{ - spin_lock_init(&libcfs_nidstring_lock); -} - -# define NIDSTR_LOCK(f) spin_lock_irqsave(&libcfs_nidstring_lock, f) -# define NIDSTR_UNLOCK(f) spin_unlock_irqrestore(&libcfs_nidstring_lock, f) -#else -# define NIDSTR_LOCK(f) (f=0) /* avoid unused var warnings */ -# define NIDSTR_UNLOCK(f) (f=0) -#endif - -static char * -libcfs_next_nidstring (void) -{ - char *str; - unsigned long flags; - - NIDSTR_LOCK(flags); - - str = libcfs_nidstrings[libcfs_nidstring_idx++]; - if (libcfs_nidstring_idx == - sizeof(libcfs_nidstrings)/sizeof(libcfs_nidstrings[0])) - libcfs_nidstring_idx = 0; - - NIDSTR_UNLOCK(flags); - return str; -} - -static int libcfs_lo_str2addr(const char *str, int nob, __u32 *addr); -static void libcfs_ip_addr2str(__u32 addr, char *str); -static int libcfs_ip_str2addr(const char *str, int nob, __u32 *addr); -static void libcfs_decnum_addr2str(__u32 addr, char *str); -static void libcfs_hexnum_addr2str(__u32 addr, char *str); -static int libcfs_num_str2addr(const char *str, int nob, __u32 *addr); - -struct netstrfns { - int nf_type; - char *nf_name; - char *nf_modname; - void (*nf_addr2str)(__u32 addr, char *str); - int (*nf_str2addr)(const char *str, int nob, __u32 *addr); -}; - -static struct netstrfns libcfs_netstrfns[] = { - {/* .nf_type */ LOLND, - /* .nf_name */ "lo", - /* .nf_modname */ "klolnd", - /* .nf_addr2str */ libcfs_decnum_addr2str, - /* .nf_str2addr */ libcfs_lo_str2addr}, - {/* .nf_type */ SOCKLND, - /* .nf_name */ "tcp", - /* .nf_modname */ "ksocklnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ O2IBLND, - /* .nf_name */ "o2ib", - /* .nf_modname */ "ko2iblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ CIBLND, - /* .nf_name */ "cib", - /* .nf_modname */ "kciblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ OPENIBLND, - /* .nf_name */ "openib", - /* .nf_modname */ "kopeniblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ IIBLND, - /* .nf_name */ "iib", - /* .nf_modname */ "kiiblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ VIBLND, - /* .nf_name */ "vib", - /* .nf_modname */ "kviblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ RALND, - /* .nf_name */ "ra", - /* .nf_modname */ "kralnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ QSWLND, - /* .nf_name */ "elan", - /* .nf_modname */ "kqswlnd", - /* .nf_addr2str */ libcfs_decnum_addr2str, - /* .nf_str2addr */ libcfs_num_str2addr}, - {/* .nf_type */ GMLND, - /* .nf_name */ "gm", - /* .nf_modname */ "kgmlnd", - /* .nf_addr2str */ libcfs_hexnum_addr2str, - /* .nf_str2addr */ libcfs_num_str2addr}, - {/* .nf_type */ MXLND, - /* .nf_name */ "mx", - /* .nf_modname */ "kmxlnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ PTLLND, - /* .nf_name */ "ptl", - /* .nf_modname */ "kptllnd", - /* .nf_addr2str */ libcfs_decnum_addr2str, - /* .nf_str2addr */ libcfs_num_str2addr}, - /* placeholder for net0 alias. It MUST BE THE LAST ENTRY */ - {/* .nf_type */ -1}, -}; - -const int libcfs_nnetstrfns = sizeof(libcfs_netstrfns)/sizeof(libcfs_netstrfns[0]); - -int -libcfs_lo_str2addr(const char *str, int nob, __u32 *addr) -{ - *addr = 0; - return 1; -} - -void -libcfs_ip_addr2str(__u32 addr, char *str) -{ -#if 0 /* never lookup */ -#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME - __u32 netip = htonl(addr); - struct hostent *he = gethostbyaddr(&netip, sizeof(netip), AF_INET); - - if (he != NULL) { - snprintf(str, LNET_NIDSTR_SIZE, "%s", he->h_name); - return; - } -#endif -#endif - snprintf(str, LNET_NIDSTR_SIZE, "%u.%u.%u.%u", - (addr >> 24) & 0xff, (addr >> 16) & 0xff, - (addr >> 8) & 0xff, addr & 0xff); -} - -/* CAVEAT EMPTOR XscanfX - * I use "%n" at the end of a sscanf format to detect trailing junk. However - * sscanf may return immediately if it sees the terminating '0' in a string, so - * I initialise the %n variable to the expected length. If sscanf sets it; - * fine, if it doesn't, then the scan ended at the end of the string, which is - * fine too :) */ - -int -libcfs_ip_str2addr(const char *str, int nob, __u32 *addr) -{ - int a; - int b; - int c; - int d; - int n = nob; /* XscanfX */ - - /* numeric IP? */ - if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 && - n == nob && - (a & ~0xff) == 0 && (b & ~0xff) == 0 && - (c & ~0xff) == 0 && (d & ~0xff) == 0) { - *addr = ((a<<24)|(b<<16)|(c<<8)|d); - return 1; - } - -#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME - /* known hostname? */ - if (('a' <= str[0] && str[0] <= 'z') || - ('A' <= str[0] && str[0] <= 'Z')) { - char *tmp; - - LIBCFS_ALLOC(tmp, nob + 1); - if (tmp != NULL) { - struct hostent *he; - - memcpy(tmp, str, nob); - tmp[nob] = 0; - - he = gethostbyname(tmp); - - LIBCFS_FREE(tmp, nob); - - if (he != NULL) { - __u32 ip = *(__u32 *)he->h_addr; - - *addr = ntohl(ip); - return 1; - } - } - } -#endif - return 0; -} - -void -libcfs_decnum_addr2str(__u32 addr, char *str) -{ - snprintf(str, LNET_NIDSTR_SIZE, "%u", addr); -} - -void -libcfs_hexnum_addr2str(__u32 addr, char *str) -{ - snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr); -} - -int -libcfs_num_str2addr(const char *str, int nob, __u32 *addr) -{ - int n; - - n = nob; - if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob) - return 1; - - n = nob; - if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob) - return 1; - - n = nob; - if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob) - return 1; - - return 0; -} - -struct netstrfns * -libcfs_lnd2netstrfns(int lnd) -{ - int i; - - if (lnd >= 0) - for (i = 0; i < libcfs_nnetstrfns; i++) - if (lnd == libcfs_netstrfns[i].nf_type) - return &libcfs_netstrfns[i]; - - return NULL; -} - -struct netstrfns * -libcfs_name2netstrfns(const char *name) -{ - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) - if (libcfs_netstrfns[i].nf_type >= 0 && - !strcmp(libcfs_netstrfns[i].nf_name, name)) - return &libcfs_netstrfns[i]; - - return NULL; -} - -int -libcfs_isknown_lnd(int type) -{ - return libcfs_lnd2netstrfns(type) != NULL; -} - -char * -libcfs_lnd2modname(int lnd) -{ - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - - return (nf == NULL) ? NULL : nf->nf_modname; -} - -char * -libcfs_lnd2str(int lnd) -{ - char *str; - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - - if (nf != NULL) - return nf->nf_name; - - str = libcfs_next_nidstring(); - snprintf(str, LNET_NIDSTR_SIZE, "?%u?", lnd); - return str; -} - -int -libcfs_str2lnd(const char *str) -{ - struct netstrfns *nf = libcfs_name2netstrfns(str); - - if (nf != NULL) - return nf->nf_type; - - return -1; -} - -char * -libcfs_net2str(__u32 net) -{ - int lnd = LNET_NETTYP(net); - int num = LNET_NETNUM(net); - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - char *str = libcfs_next_nidstring(); - - if (nf == NULL) - snprintf(str, LNET_NIDSTR_SIZE, "<%u:%u>", lnd, num); - else if (num == 0) - snprintf(str, LNET_NIDSTR_SIZE, "%s", nf->nf_name); - else - snprintf(str, LNET_NIDSTR_SIZE, "%s%u", nf->nf_name, num); - - return str; -} - -char * -libcfs_nid2str(lnet_nid_t nid) -{ - __u32 addr = LNET_NIDADDR(nid); - __u32 net = LNET_NIDNET(nid); - int lnd = LNET_NETTYP(net); - int nnum = LNET_NETNUM(net); - struct netstrfns *nf; - char *str; - int nob; - - if (nid == LNET_NID_ANY) - return "LNET_NID_ANY"; - - nf = libcfs_lnd2netstrfns(lnd); - str = libcfs_next_nidstring(); - - if (nf == NULL) - snprintf(str, LNET_NIDSTR_SIZE, "%x@<%u:%u>", addr, lnd, nnum); - else { - nf->nf_addr2str(addr, str); - nob = strlen(str); - if (nnum == 0) - snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s", - nf->nf_name); - else - snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s%u", - nf->nf_name, nnum); - } - - return str; -} - -static struct netstrfns * -libcfs_str2net_internal(const char *str, __u32 *net) -{ - struct netstrfns *nf; - int nob; - int netnum; - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) { - nf = &libcfs_netstrfns[i]; - if (nf->nf_type >= 0 && - !strncmp(str, nf->nf_name, strlen(nf->nf_name))) - break; - } - - if (i == libcfs_nnetstrfns) - return NULL; - - nob = strlen(nf->nf_name); - - if (strlen(str) == (unsigned int)nob) { - netnum = 0; - } else { - if (nf->nf_type == LOLND) /* net number not allowed */ - return NULL; - - str += nob; - i = strlen(str); - if (sscanf(str, "%u%n", &netnum, &i) < 1 || - i != (int)strlen(str)) - return NULL; - } - - *net = LNET_MKNET(nf->nf_type, netnum); - return nf; -} - -__u32 -libcfs_str2net(const char *str) -{ - __u32 net; - - if (libcfs_str2net_internal(str, &net) != NULL) - return net; - - return LNET_NIDNET(LNET_NID_ANY); -} - -lnet_nid_t -libcfs_str2nid(const char *str) -{ - const char *sep = strchr(str, '@'); - struct netstrfns *nf; - __u32 net; - __u32 addr; - - if (sep != NULL) { - nf = libcfs_str2net_internal(sep + 1, &net); - if (nf == NULL) - return LNET_NID_ANY; - } else { - sep = str + strlen(str); - net = LNET_MKNET(SOCKLND, 0); - nf = libcfs_lnd2netstrfns(SOCKLND); - LASSERT (nf != NULL); - } - - if (!nf->nf_str2addr(str, sep - str, &addr)) - return LNET_NID_ANY; - - return LNET_MKNID(net, addr); -} - -char * -libcfs_id2str(lnet_process_id_t id) -{ - char *str = libcfs_next_nidstring(); - - if (id.pid == LNET_PID_ANY) { - snprintf(str, LNET_NIDSTR_SIZE, - "LNET_PID_ANY-%s", libcfs_nid2str(id.nid)); - return str; - } - - snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s", - ((id.pid & LNET_PID_USERFLAG) != 0) ? "U" : "", - (id.pid & ~LNET_PID_USERFLAG), libcfs_nid2str(id.nid)); - return str; -} - -int -libcfs_str2anynid(lnet_nid_t *nidp, const char *str) -{ - if (!strcmp(str, "*")) { - *nidp = LNET_NID_ANY; - return 1; - } - - *nidp = libcfs_str2nid(str); - return *nidp != LNET_NID_ANY; -} - -#ifdef __KERNEL__ -void -libcfs_setnet0alias(int lnd) -{ - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - struct netstrfns *nf0 = &libcfs_netstrfns[libcfs_nnetstrfns - 1]; - - /* Ghastly hack to allow LNET to inter-operate with portals. - * NET type 0 becomes an alias for whatever local network we have, and - * this assignment here means we can parse and print its NIDs */ - - LASSERT (nf != NULL); - LASSERT (nf0->nf_type < 0); - - nf0->nf_name = "zero";//nf->nf_name; - nf0->nf_modname = nf->nf_modname; - nf0->nf_addr2str = nf->nf_addr2str; - nf0->nf_str2addr = nf->nf_str2addr; - mb(); - nf0->nf_type = 0; -} - -EXPORT_SYMBOL(libcfs_isknown_lnd); -EXPORT_SYMBOL(libcfs_lnd2modname); -EXPORT_SYMBOL(libcfs_lnd2str); -EXPORT_SYMBOL(libcfs_str2lnd); -EXPORT_SYMBOL(libcfs_net2str); -EXPORT_SYMBOL(libcfs_nid2str); -EXPORT_SYMBOL(libcfs_str2net); -EXPORT_SYMBOL(libcfs_str2nid); -EXPORT_SYMBOL(libcfs_id2str); -EXPORT_SYMBOL(libcfs_str2anynid); -EXPORT_SYMBOL(libcfs_setnet0alias); -#else /* __KERNEL__ */ -void -libcfs_setnet0alias(int lnd) -{ - LCONSOLE_ERROR_MSG(0x125, "Liblustre cannot interoperate with old " - "Portals.\nportals_compatibility must be set to " - "'none'.\n"); -} -#endif diff --git a/lnet/libcfs/tracefile.c b/lnet/libcfs/tracefile.c deleted file mode 100644 index 4a5cf5245d023d2ead23c3f17c36ec28253fb83d..0000000000000000000000000000000000000000 --- a/lnet/libcfs/tracefile.c +++ /dev/null @@ -1,1114 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Zach Brown <zab@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE -#include "tracefile.h" - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> - -/* XXX move things up to the top, comment */ -union trace_data_union (*trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned; - -char tracefile[TRACEFILE_NAME_SIZE]; -long long tracefile_size = TRACEFILE_SIZE; -static struct tracefiled_ctl trace_tctl; -struct semaphore trace_thread_sem; -static int thread_running = 0; - -atomic_t tage_allocated = ATOMIC_INIT(0); - -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct trace_cpu_data *tcd); - -static inline struct trace_page *tage_from_list(struct list_head *list) -{ - return list_entry(list, struct trace_page, linkage); -} - -static struct trace_page *tage_alloc(int gfp) -{ - cfs_page_t *page; - struct trace_page *tage; - - /* - * Don't spam console with allocation failures: they will be reported - * by upper layer anyway. - */ - gfp |= CFS_ALLOC_NOWARN; - page = cfs_alloc_page(gfp); - if (page == NULL) - return NULL; - - tage = cfs_alloc(sizeof(*tage), gfp); - if (tage == NULL) { - cfs_free_page(page); - return NULL; - } - - tage->page = page; - atomic_inc(&tage_allocated); - return tage; -} - -static void tage_free(struct trace_page *tage) -{ - __LASSERT(tage != NULL); - __LASSERT(tage->page != NULL); - - cfs_free_page(tage->page); - cfs_free(tage); - atomic_dec(&tage_allocated); -} - -static void tage_to_tail(struct trace_page *tage, struct list_head *queue) -{ - __LASSERT(tage != NULL); - __LASSERT(queue != NULL); - - list_move_tail(&tage->linkage, queue); -} - -int trace_refill_stock(struct trace_cpu_data *tcd, int gfp, - struct list_head *stock) -{ - int i; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) { - struct trace_page *tage; - - tage = tage_alloc(gfp); - if (tage == NULL) - break; - list_add_tail(&tage->linkage, stock); - } - return i; -} - -/* return a page that has 'len' bytes left at the end */ -static struct trace_page *trace_get_tage_try(struct trace_cpu_data *tcd, - unsigned long len) -{ - struct trace_page *tage; - - if (tcd->tcd_cur_pages > 0) { - __LASSERT(!list_empty(&tcd->tcd_pages)); - tage = tage_from_list(tcd->tcd_pages.prev); - if (tage->used + len <= CFS_PAGE_SIZE) - return tage; - } - - if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { - if (tcd->tcd_cur_stock_pages > 0) { - tage = tage_from_list(tcd->tcd_stock_pages.prev); - -- tcd->tcd_cur_stock_pages; - list_del_init(&tage->linkage); - } else { - tage = tage_alloc(CFS_ALLOC_ATOMIC); - if (tage == NULL) { - printk(KERN_WARNING - "failure to allocate a tage (%ld)\n", - tcd->tcd_cur_pages); - return NULL; - } - } - - tage->used = 0; - tage->cpu = smp_processor_id(); - tage->type = tcd->tcd_type; - list_add_tail(&tage->linkage, &tcd->tcd_pages); - tcd->tcd_cur_pages++; - - if (tcd->tcd_cur_pages > 8 && thread_running) { - struct tracefiled_ctl *tctl = &trace_tctl; - /* - * wake up tracefiled to process some pages. - */ - cfs_waitq_signal(&tctl->tctl_waitq); - } - return tage; - } - return NULL; -} - -static void tcd_shrink(struct trace_cpu_data *tcd) -{ - int pgcount = tcd->tcd_cur_pages / 10; - struct page_collection pc; - struct trace_page *tage; - struct trace_page *tmp; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - printk(KERN_WARNING "debug daemon buffer overflowed; discarding" - " 10%% of pages (%d of %ld)\n", pgcount + 1, tcd->tcd_cur_pages); - - CFS_INIT_LIST_HEAD(&pc.pc_pages); - spin_lock_init(&pc.pc_lock); - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { - if (pgcount-- == 0) - break; - - list_move_tail(&tage->linkage, &pc.pc_pages); - tcd->tcd_cur_pages--; - } - put_pages_on_tcd_daemon_list(&pc, tcd); -} - -/* return a page that has 'len' bytes left at the end */ -static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd, - unsigned long len) -{ - struct trace_page *tage; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - if (len > CFS_PAGE_SIZE) { - printk(KERN_ERR - "cowardly refusing to write %lu bytes in a page\n", len); - return NULL; - } - - tage = trace_get_tage_try(tcd, len); - if (tage != NULL) - return tage; - if (thread_running) - tcd_shrink(tcd); - if (tcd->tcd_cur_pages > 0) { - tage = tage_from_list(tcd->tcd_pages.next); - tage->used = 0; - tage_to_tail(tage, &tcd->tcd_pages); - } - return tage; -} - -int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask, - const char *file, const char *fn, const int line, - const char *format1, va_list args, - const char *format2, ...) -{ - struct trace_cpu_data *tcd = NULL; - struct ptldebug_header header; - struct trace_page *tage; - /* string_buf is used only if tcd != NULL, and is always set then */ - char *string_buf = NULL; - char *debug_buf; - int known_size; - int needed = 85; /* average message length */ - int max_nob; - va_list ap; - int depth; - int i; - int remain; - - if (strchr(file, '/')) - file = strrchr(file, '/') + 1; - - - set_ptldebug_header(&header, subsys, mask, line, CDEBUG_STACK()); - - tcd = trace_get_tcd(); - if (tcd == NULL) /* arch may not log in IRQ context */ - goto console; - - if (tcd->tcd_shutting_down) { - trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - depth = __current_nesting_level(); - known_size = strlen(file) + 1 + depth; - if (fn) - known_size += strlen(fn) + 1; - - if (libcfs_debug_binary) - known_size += sizeof(header); - - /*/ - * '2' used because vsnprintf return real size required for output - * _without_ terminating NULL. - * if needed is to small for this format. - */ - for (i=0;i<2;i++) { - tage = trace_get_tage(tcd, needed + known_size + 1); - if (tage == NULL) { - if (needed + known_size > CFS_PAGE_SIZE) - mask |= D_ERROR; - - trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - string_buf = (char *)cfs_page_address(tage->page)+tage->used+known_size; - - max_nob = CFS_PAGE_SIZE - tage->used - known_size; - if (max_nob <= 0) { - printk(KERN_EMERG "negative max_nob: %i\n", max_nob); - mask |= D_ERROR; - trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - needed = 0; - if (format1) { - va_copy(ap, args); - needed = vsnprintf(string_buf, max_nob, format1, ap); - va_end(ap); - } - - - if (format2) { - remain = max_nob - needed; - if (remain < 0) - remain = 0; - - va_start(ap, format2); - needed += vsnprintf(string_buf+needed, remain, format2, ap); - va_end(ap); - } - - if (needed < max_nob) /* well. printing ok.. */ - break; - } - - if (*(string_buf+needed-1) != '\n') - printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", - file, line, fn); - - header.ph_len = known_size + needed; - debug_buf = (char *)cfs_page_address(tage->page) + tage->used; - - if (libcfs_debug_binary) { - memcpy(debug_buf, &header, sizeof(header)); - tage->used += sizeof(header); - debug_buf += sizeof(header); - } - - /* indent message according to the nesting level */ - while (depth-- > 0) { - *(debug_buf++) = '.'; - ++ tage->used; - } - - strcpy(debug_buf, file); - tage->used += strlen(file) + 1; - debug_buf += strlen(file) + 1; - - if (fn) { - strcpy(debug_buf, fn); - tage->used += strlen(fn) + 1; - debug_buf += strlen(fn) + 1; - } - - __LASSERT(debug_buf == string_buf); - - tage->used += needed; - __LASSERT (tage->used <= CFS_PAGE_SIZE); - -console: - if ((mask & libcfs_printk) == 0) { - /* no console output requested */ - if (tcd != NULL) - trace_put_tcd(tcd); - return 1; - } - - if (cdls != NULL) { - if (libcfs_console_ratelimit && - cdls->cdls_next != 0 && /* not first time ever */ - !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { - /* skipping a console message */ - cdls->cdls_count++; - if (tcd != NULL) - trace_put_tcd(tcd); - return 1; - } - - if (cfs_time_after(cfs_time_current(), cdls->cdls_next + - libcfs_console_max_delay - + cfs_time_seconds(10))) { - /* last timeout was a long time ago */ - cdls->cdls_delay /= libcfs_console_backoff * 4; - } else { - cdls->cdls_delay *= libcfs_console_backoff; - - if (cdls->cdls_delay < libcfs_console_min_delay) - cdls->cdls_delay = libcfs_console_min_delay; - else if (cdls->cdls_delay > libcfs_console_max_delay) - cdls->cdls_delay = libcfs_console_max_delay; - } - - /* ensure cdls_next is never zero after it's been seen */ - cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; - } - - if (tcd != NULL) { - print_to_console(&header, mask, string_buf, needed, file, fn); - trace_put_tcd(tcd); - } else { - string_buf = trace_get_console_buffer(); - - needed = 0; - if (format1 != NULL) { - va_copy(ap, args); - needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap); - va_end(ap); - } - if (format2 != NULL) { - remain = TRACE_CONSOLE_BUFFER_SIZE - needed; - if (remain > 0) { - va_start(ap, format2); - needed += vsnprintf(string_buf+needed, remain, format2, ap); - va_end(ap); - } - } - print_to_console(&header, mask, - string_buf, needed, file, fn); - - trace_put_console_buffer(string_buf); - } - - if (cdls != NULL && cdls->cdls_count != 0) { - string_buf = trace_get_console_buffer(); - - needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, - "Skipped %d previous similar message%s\n", - cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : ""); - - print_to_console(&header, mask, - string_buf, needed, file, fn); - - trace_put_console_buffer(string_buf); - cdls->cdls_count = 0; - } - - return 0; -} -EXPORT_SYMBOL(libcfs_debug_vmsg2); - -void -libcfs_assertion_failed(const char *expr, const char *file, - const char *func, const int line) -{ - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, - "ASSERTION(%s) failed\n", expr); - LBUG(); -} -EXPORT_SYMBOL(libcfs_assertion_failed); - -void -trace_assertion_failed(const char *str, - const char *fn, const char *file, int line) -{ - struct ptldebug_header hdr; - - libcfs_panic_in_progress = 1; - libcfs_catastrophe = 1; - mb(); - - set_ptldebug_header(&hdr, DEBUG_SUBSYSTEM, D_EMERG, line, - CDEBUG_STACK()); - - print_to_console(&hdr, D_EMERG, str, strlen(str), file, fn); - - LIBCFS_PANIC("Lustre debug assertion failure\n"); - - /* not reached */ -} - -static void -panic_collect_pages(struct page_collection *pc) -{ - /* Do the collect_pages job on a single CPU: assumes that all other - * CPUs have been stopped during a panic. If this isn't true for some - * arch, this will have to be implemented separately in each arch. */ - int i; - int j; - struct trace_cpu_data *tcd; - - CFS_INIT_LIST_HEAD(&pc->pc_pages); - - tcd_for_each(tcd, i, j) { - list_splice_init(&tcd->tcd_pages, &pc->pc_pages); - tcd->tcd_cur_pages = 0; - - if (pc->pc_want_daemon_pages) { - list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); - tcd->tcd_cur_daemon_pages = 0; - } - } -} - -static void collect_pages_on_cpu(void *info) -{ - struct trace_cpu_data *tcd; - struct page_collection *pc = info; - int i; - - spin_lock(&pc->pc_lock); - tcd_for_each_type_lock(tcd, i) { - list_splice_init(&tcd->tcd_pages, &pc->pc_pages); - tcd->tcd_cur_pages = 0; - if (pc->pc_want_daemon_pages) { - list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); - tcd->tcd_cur_daemon_pages = 0; - } - } - spin_unlock(&pc->pc_lock); -} - -static void collect_pages(struct page_collection *pc) -{ - CFS_INIT_LIST_HEAD(&pc->pc_pages); - - if (libcfs_panic_in_progress) - panic_collect_pages(pc); - else - trace_call_on_all_cpus(collect_pages_on_cpu, pc); -} - -static void put_pages_back_on_cpu(void *info) -{ - struct page_collection *pc = info; - struct trace_cpu_data *tcd; - struct list_head *cur_head; - struct trace_page *tage; - struct trace_page *tmp; - int i; - - spin_lock(&pc->pc_lock); - tcd_for_each_type_lock(tcd, i) { - cur_head = tcd->tcd_pages.next; - - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != smp_processor_id() || tage->type != i) - continue; - - tage_to_tail(tage, cur_head); - tcd->tcd_cur_pages++; - } - } - spin_unlock(&pc->pc_lock); -} - -static void put_pages_back(struct page_collection *pc) -{ - if (!libcfs_panic_in_progress) - trace_call_on_all_cpus(put_pages_back_on_cpu, pc); -} - -/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that - * we have a good amount of data at all times for dumping during an LBUG, even - * if we have been steadily writing (and otherwise discarding) pages via the - * debug daemon. */ -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct trace_cpu_data *tcd) -{ - struct trace_page *tage; - struct trace_page *tmp; - - spin_lock(&pc->pc_lock); - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != smp_processor_id() || - tage->type != tcd->tcd_type) - continue; - - tage_to_tail(tage, &tcd->tcd_daemon_pages); - tcd->tcd_cur_daemon_pages++; - - if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { - struct trace_page *victim; - - __LASSERT(!list_empty(&tcd->tcd_daemon_pages)); - victim = tage_from_list(tcd->tcd_daemon_pages.next); - - __LASSERT_TAGE_INVARIANT(victim); - - list_del(&victim->linkage); - tage_free(victim); - tcd->tcd_cur_daemon_pages--; - } - } - spin_unlock(&pc->pc_lock); -} - -static void put_pages_on_daemon_list_on_cpu(void *info) -{ - struct trace_cpu_data *tcd; - int i; - - tcd_for_each_type_lock(tcd, i) - put_pages_on_tcd_daemon_list(info, tcd); -} - -static void put_pages_on_daemon_list(struct page_collection *pc) -{ - trace_call_on_all_cpus(put_pages_on_daemon_list_on_cpu, pc); -} - -void trace_debug_print(void) -{ - struct page_collection pc; - struct trace_page *tage; - struct trace_page *tmp; - - spin_lock_init(&pc.pc_lock); - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - char *p, *file, *fn; - cfs_page_t *page; - - __LASSERT_TAGE_INVARIANT(tage); - - page = tage->page; - p = cfs_page_address(page); - while (p < ((char *)cfs_page_address(page) + tage->used)) { - struct ptldebug_header *hdr; - int len; - hdr = (void *)p; - p += sizeof(*hdr); - file = p; - p += strlen(file) + 1; - fn = p; - p += strlen(fn) + 1; - len = hdr->ph_len - (p - (char *)hdr); - - print_to_console(hdr, D_EMERG, p, len, file, fn); - - p += len; - } - - list_del(&tage->linkage); - tage_free(tage); - } -} - -int tracefile_dump_all_pages(char *filename) -{ - struct page_collection pc; - cfs_file_t *filp; - struct trace_page *tage; - struct trace_page *tmp; - int rc; - - CFS_DECL_MMSPACE; - - tracefile_write_lock(); - - filp = cfs_filp_open(filename, - O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc); - if (!filp) { - if (rc != -EEXIST) - printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n", - filename, rc); - goto out; - } - - spin_lock_init(&pc.pc_lock); - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) { - rc = 0; - goto close; - } - - /* ok, for now, just write the pages. in the future we'll be building - * iobufs with the pages and calling generic_direct_IO */ - CFS_MMSPACE_OPEN; - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - - __LASSERT_TAGE_INVARIANT(tage); - - rc = cfs_filp_write(filp, cfs_page_address(tage->page), - tage->used, cfs_filp_poff(filp)); - if (rc != (int)tage->used) { - printk(KERN_WARNING "wanted to write %u but wrote " - "%d\n", tage->used, rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - break; - } - list_del(&tage->linkage); - tage_free(tage); - } - CFS_MMSPACE_CLOSE; - rc = cfs_filp_fsync(filp); - if (rc) - printk(KERN_ERR "sync returns %d\n", rc); - close: - cfs_filp_close(filp); - out: - tracefile_write_unlock(); - return rc; -} - -void trace_flush_pages(void) -{ - struct page_collection pc; - struct trace_page *tage; - struct trace_page *tmp; - - spin_lock_init(&pc.pc_lock); - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - tage_free(tage); - } -} - -int trace_copyin_string(char *knl_buffer, int knl_buffer_nob, - const char *usr_buffer, int usr_buffer_nob) -{ - int nob; - - if (usr_buffer_nob > knl_buffer_nob) - return -EOVERFLOW; - - if (copy_from_user((void *)knl_buffer, - (void *)usr_buffer, usr_buffer_nob)) - return -EFAULT; - - nob = strnlen(knl_buffer, usr_buffer_nob); - while (nob-- >= 0) /* strip trailing whitespace */ - if (!isspace(knl_buffer[nob])) - break; - - if (nob < 0) /* empty string */ - return -EINVAL; - - if (nob == knl_buffer_nob) /* no space to terminate */ - return -EOVERFLOW; - - knl_buffer[nob + 1] = 0; /* terminate */ - return 0; -} - -int trace_copyout_string(char *usr_buffer, int usr_buffer_nob, - const char *knl_buffer, char *append) -{ - /* NB if 'append' != NULL, it's a single character to append to the - * copied out string - usually "\n", for /proc entries and "" (i.e. a - * terminating zero byte) for sysctl entries */ - int nob = strlen(knl_buffer); - - if (nob > usr_buffer_nob) - nob = usr_buffer_nob; - - if (copy_to_user(usr_buffer, knl_buffer, nob)) - return -EFAULT; - - if (append != NULL && nob < usr_buffer_nob) { - if (copy_to_user(usr_buffer + nob, append, 1)) - return -EFAULT; - - nob++; - } - - return nob; -} - -int trace_allocate_string_buffer(char **str, int nob) -{ - if (nob > 2 * CFS_PAGE_SIZE) /* string must be "sensible" */ - return -EINVAL; - - *str = cfs_alloc(nob, CFS_ALLOC_STD | CFS_ALLOC_ZERO); - if (*str == NULL) - return -ENOMEM; - - return 0; -} - -void trace_free_string_buffer(char *str, int nob) -{ - cfs_free(str); -} - -int trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob) -{ - char *str; - int rc; - - rc = trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc != 0) - return rc; - - rc = trace_copyin_string(str, usr_str_nob + 1, - usr_str, usr_str_nob); - if (rc != 0) - goto out; - -#if !defined(__WINNT__) - if (str[0] != '/') { - rc = -EINVAL; - goto out; - } -#endif - rc = tracefile_dump_all_pages(str); -out: - trace_free_string_buffer(str, usr_str_nob + 1); - return rc; -} - -int trace_daemon_command(char *str) -{ - int rc = 0; - - tracefile_write_lock(); - - if (strcmp(str, "stop") == 0) { - trace_stop_thread(); - memset(tracefile, 0, sizeof(tracefile)); - - } else if (strncmp(str, "size=", 5) == 0) { - tracefile_size = simple_strtoul(str + 5, NULL, 0); - if (tracefile_size < 10 || tracefile_size > 20480) - tracefile_size = TRACEFILE_SIZE; - else - tracefile_size <<= 20; - - } else if (strlen(str) >= sizeof(tracefile)) { - rc = -ENAMETOOLONG; -#ifndef __WINNT__ - } else if (str[0] != '/') { - rc = -EINVAL; -#endif - } else { - strcpy(tracefile, str); - - printk(KERN_INFO "Lustre: debug daemon will attempt to start writing " - "to %s (%lukB max)\n", tracefile, - (long)(tracefile_size >> 10)); - - trace_start_thread(); - } - - tracefile_write_unlock(); - return rc; -} - -int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob) -{ - char *str; - int rc; - - rc = trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc != 0) - return rc; - - rc = trace_copyin_string(str, usr_str_nob + 1, - usr_str, usr_str_nob); - if (rc == 0) - rc = trace_daemon_command(str); - - trace_free_string_buffer(str, usr_str_nob + 1); - return rc; -} - -int trace_set_debug_mb(int mb) -{ - int i; - int j; - int pages; - int limit = trace_max_debug_mb(); - struct trace_cpu_data *tcd; - - if (mb < num_possible_cpus()) - return -EINVAL; - - if (mb > limit) { - printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " - "%dMB - limit is %d\n", mb, limit); - return -EINVAL; - } - - mb /= num_possible_cpus(); - pages = mb << (20 - CFS_PAGE_SHIFT); - - tracefile_write_lock(); - - tcd_for_each(tcd, i, j) - tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; - - tracefile_write_unlock(); - - return 0; -} - -int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob) -{ - char str[32]; - int rc; - - rc = trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob); - if (rc < 0) - return rc; - - return trace_set_debug_mb(simple_strtoul(str, NULL, 0)); -} - -int trace_get_debug_mb(void) -{ - int i; - int j; - struct trace_cpu_data *tcd; - int total_pages = 0; - - tracefile_read_lock(); - - tcd_for_each(tcd, i, j) - total_pages += tcd->tcd_max_pages; - - tracefile_read_unlock(); - - return (total_pages >> (20 - CFS_PAGE_SHIFT)) + 1; -} - -static int tracefiled(void *arg) -{ - struct page_collection pc; - struct tracefiled_ctl *tctl = arg; - struct trace_page *tage; - struct trace_page *tmp; - struct ptldebug_header *hdr; - cfs_file_t *filp; - int rc; - - CFS_DECL_MMSPACE; - - /* we're started late enough that we pick up init's fs context */ - /* this is so broken in uml? what on earth is going on? */ - cfs_daemonize("ktracefiled"); - - spin_lock_init(&pc.pc_lock); - complete(&tctl->tctl_start); - - while (1) { - cfs_waitlink_t __wait; - - cfs_waitlink_init(&__wait); - cfs_waitq_add(&tctl->tctl_waitq, &__wait); - set_current_state(TASK_INTERRUPTIBLE); - cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE, - cfs_time_seconds(1)); - cfs_waitq_del(&tctl->tctl_waitq, &__wait); - - if (atomic_read(&tctl->tctl_shutdown)) - break; - - pc.pc_want_daemon_pages = 0; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) - continue; - - filp = NULL; - tracefile_read_lock(); - if (tracefile[0] != 0) { - filp = cfs_filp_open(tracefile, - O_CREAT | O_RDWR | O_LARGEFILE, - 0600, &rc); - if (!(filp)) - printk(KERN_WARNING "couldn't open %s: %d\n", - tracefile, rc); - } - tracefile_read_unlock(); - if (filp == NULL) { - put_pages_on_daemon_list(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - continue; - } - - CFS_MMSPACE_OPEN; - - /* mark the first header, so we can sort in chunks */ - tage = tage_from_list(pc.pc_pages.next); - __LASSERT_TAGE_INVARIANT(tage); - - hdr = cfs_page_address(tage->page); - hdr->ph_flags |= PH_FLAG_FIRST_RECORD; - - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - static loff_t f_pos; - - __LASSERT_TAGE_INVARIANT(tage); - - if (f_pos >= (off_t)tracefile_size) - f_pos = 0; - else if (f_pos > cfs_filp_size(filp)) - f_pos = cfs_filp_size(filp); - - rc = cfs_filp_write(filp, cfs_page_address(tage->page), - tage->used, &f_pos); - if (rc != (int)tage->used) { - printk(KERN_WARNING "wanted to write %u but " - "wrote %d\n", tage->used, rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - } - } - CFS_MMSPACE_CLOSE; - - cfs_filp_close(filp); - put_pages_on_daemon_list(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - } - complete(&tctl->tctl_stop); - return 0; -} - -int trace_start_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - int rc = 0; - - mutex_down(&trace_thread_sem); - if (thread_running) - goto out; - - init_completion(&tctl->tctl_start); - init_completion(&tctl->tctl_stop); - cfs_waitq_init(&tctl->tctl_waitq); - atomic_set(&tctl->tctl_shutdown, 0); - - if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) { - rc = -ECHILD; - goto out; - } - - wait_for_completion(&tctl->tctl_start); - thread_running = 1; -out: - mutex_up(&trace_thread_sem); - return rc; -} - -void trace_stop_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - - mutex_down(&trace_thread_sem); - if (thread_running) { - printk(KERN_INFO "Lustre: shutting down debug daemon thread...\n"); - atomic_set(&tctl->tctl_shutdown, 1); - wait_for_completion(&tctl->tctl_stop); - thread_running = 0; - } - mutex_up(&trace_thread_sem); -} - -int tracefile_init(int max_pages) -{ - struct trace_cpu_data *tcd; - int i; - int j; - int rc; - int factor; - - rc = tracefile_init_arch(); - if (rc != 0) - return rc; - - tcd_for_each(tcd, i, j) { - /* tcd_pages_factor is initialized int tracefile_init_arch. */ - factor = tcd->tcd_pages_factor; - CFS_INIT_LIST_HEAD(&tcd->tcd_pages); - CFS_INIT_LIST_HEAD(&tcd->tcd_stock_pages); - CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages); - tcd->tcd_cur_pages = 0; - tcd->tcd_cur_stock_pages = 0; - tcd->tcd_cur_daemon_pages = 0; - tcd->tcd_max_pages = (max_pages * factor) / 100; - LASSERT(tcd->tcd_max_pages > 0); - tcd->tcd_shutting_down = 0; - } - - return 0; -} - -static void trace_cleanup_on_cpu(void *info) -{ - struct trace_cpu_data *tcd; - struct trace_page *tage; - struct trace_page *tmp; - int i; - - tcd_for_each_type_lock(tcd, i) { - tcd->tcd_shutting_down = 1; - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - tage_free(tage); - } - tcd->tcd_cur_pages = 0; - } -} - -static void trace_cleanup(void) -{ - struct page_collection pc; - - CFS_INIT_LIST_HEAD(&pc.pc_pages); - spin_lock_init(&pc.pc_lock); - - trace_call_on_all_cpus(trace_cleanup_on_cpu, &pc); - - tracefile_fini_arch(); -} - -void tracefile_exit(void) -{ - trace_stop_thread(); - trace_cleanup(); -} diff --git a/lnet/libcfs/tracefile.h b/lnet/libcfs/tracefile.h deleted file mode 100644 index 7d433924582849534a6e85442de5a358364e27ab..0000000000000000000000000000000000000000 --- a/lnet/libcfs/tracefile.h +++ /dev/null @@ -1,248 +0,0 @@ -#ifndef __LIBCFS_TRACEFILE_H__ -#define __LIBCFS_TRACEFILE_H__ - -#include <libcfs/libcfs.h> - -/* trace file lock routines */ - -#define TRACEFILE_NAME_SIZE 1024 -extern char tracefile[TRACEFILE_NAME_SIZE]; -extern long long tracefile_size; - -int tracefile_init_arch(void); -void tracefile_fini_arch(void); - -void tracefile_read_lock(void); -void tracefile_read_unlock(void); -void tracefile_write_lock(void); -void tracefile_write_unlock(void); - -int tracefile_dump_all_pages(char *filename); -void trace_debug_print(void); -void trace_flush_pages(void); -int trace_start_thread(void); -void trace_stop_thread(void); -int tracefile_init(int max_pages); -void tracefile_exit(void); - - - -int trace_copyin_string(char *knl_buffer, int knl_buffer_nob, - const char *usr_buffer, int usr_buffer_nob); -int trace_copyout_string(char *usr_buffer, int usr_buffer_nob, - const char *knl_str, char *append); -int trace_allocate_string_buffer(char **str, int nob); -void trace_free_string_buffer(char *str, int nob); -int trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob); -int trace_daemon_command(char *str); -int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob); -int trace_set_debug_mb(int mb); -int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob); -int trace_get_debug_mb(void); - -extern void libcfs_debug_dumplog_internal(void *arg); -extern void libcfs_register_panic_notifier(void); -extern void libcfs_unregister_panic_notifier(void); -extern int libcfs_panic_in_progress; -extern int trace_max_debug_mb(void); - -#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT)) -#define TCD_STOCK_PAGES (TCD_MAX_PAGES) -#define TRACEFILE_SIZE (500 << 20) - -#ifdef LUSTRE_TRACEFILE_PRIVATE - -/* - * Private declare for tracefile - */ -#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT)) -#define TCD_STOCK_PAGES (TCD_MAX_PAGES) - -#define TRACEFILE_SIZE (500 << 20) - -/* Size of a buffer for sprinting console messages if we can't get a page - * from system */ -#define TRACE_CONSOLE_BUFFER_SIZE 1024 - -union trace_data_union { - struct trace_cpu_data { - /* - * pages with trace records not yet processed by tracefiled. - */ - struct list_head tcd_pages; - /* number of pages on ->tcd_pages */ - unsigned long tcd_cur_pages; - - /* - * pages with trace records already processed by - * tracefiled. These pages are kept in memory, so that some - * portion of log can be written in the event of LBUG. This - * list is maintained in LRU order. - * - * Pages are moved to ->tcd_daemon_pages by tracefiled() - * (put_pages_on_daemon_list()). LRU pages from this list are - * discarded when list grows too large. - */ - struct list_head tcd_daemon_pages; - /* number of pages on ->tcd_daemon_pages */ - unsigned long tcd_cur_daemon_pages; - - /* - * Maximal number of pages allowed on ->tcd_pages and - * ->tcd_daemon_pages each. - * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current - * implementation. - */ - unsigned long tcd_max_pages; - - /* - * preallocated pages to write trace records into. Pages from - * ->tcd_stock_pages are moved to ->tcd_pages by - * portals_debug_msg(). - * - * This list is necessary, because on some platforms it's - * impossible to perform efficient atomic page allocation in a - * non-blockable context. - * - * Such platforms fill ->tcd_stock_pages "on occasion", when - * tracing code is entered in blockable context. - * - * trace_get_tage_try() tries to get a page from - * ->tcd_stock_pages first and resorts to atomic page - * allocation only if this queue is empty. ->tcd_stock_pages - * is replenished when tracing code is entered in blocking - * context (darwin-tracefile.c:trace_get_tcd()). We try to - * maintain TCD_STOCK_PAGES (40 by default) pages in this - * queue. Atomic allocation is only required if more than - * TCD_STOCK_PAGES pagesful are consumed by trace records all - * emitted in non-blocking contexts. Which is quite unlikely. - */ - struct list_head tcd_stock_pages; - /* number of pages on ->tcd_stock_pages */ - unsigned long tcd_cur_stock_pages; - - unsigned short tcd_shutting_down; - unsigned short tcd_cpu; - unsigned short tcd_type; - /* The factors to share debug memory. */ - unsigned short tcd_pages_factor; - } tcd; - char __pad[L1_CACHE_ALIGN(sizeof(struct trace_cpu_data))]; -}; - -#define TCD_MAX_TYPES 8 -extern union trace_data_union (*trace_data[TCD_MAX_TYPES])[NR_CPUS]; - -#define tcd_for_each(tcd, i, j) \ - for (i = 0; trace_data[i] != NULL; i++) \ - for (j = 0, ((tcd) = &(*trace_data[i])[j].tcd); \ - j < num_possible_cpus(); j++, (tcd) = &(*trace_data[i])[j].tcd) - -#define tcd_for_each_type_lock(tcd, i) \ - for (i = 0; trace_data[i] && \ - (tcd = &(*trace_data[i])[smp_processor_id()].tcd) && \ - trace_lock_tcd(tcd); trace_unlock_tcd(tcd), i++) - -/* XXX nikita: this declaration is internal to tracefile.c and should probably - * be moved there */ -struct page_collection { - struct list_head pc_pages; - /* - * spin-lock protecting ->pc_pages. It is taken by smp_call_function() - * call-back functions. XXX nikita: Which is horrible: all processors - * receive NMI at the same time only to be serialized by this - * lock. Probably ->pc_pages should be replaced with an array of - * NR_CPUS elements accessed locklessly. - */ - spinlock_t pc_lock; - /* - * if this flag is set, collect_pages() will spill both - * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise, - * only ->tcd_pages are spilled. - */ - int pc_want_daemon_pages; -}; - -/* XXX nikita: this declaration is internal to tracefile.c and should probably - * be moved there */ -struct tracefiled_ctl { - struct completion tctl_start; - struct completion tctl_stop; - cfs_waitq_t tctl_waitq; - pid_t tctl_pid; - atomic_t tctl_shutdown; -}; - -/* - * small data-structure for each page owned by tracefiled. - */ -/* XXX nikita: this declaration is internal to tracefile.c and should probably - * be moved there */ -struct trace_page { - /* - * page itself - */ - cfs_page_t *page; - /* - * linkage into one of the lists in trace_data_union or - * page_collection - */ - struct list_head linkage; - /* - * number of bytes used within this page - */ - unsigned int used; - /* - * cpu that owns this page - */ - unsigned short cpu; - /* - * type(context) of this page - */ - unsigned short type; -}; - -extern void set_ptldebug_header(struct ptldebug_header *header, - int subsys, int mask, const int line, - unsigned long stack); -extern void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, - int len, const char *file, const char *fn); - -extern struct trace_cpu_data *trace_get_tcd(void); -extern void trace_put_tcd(struct trace_cpu_data *tcd); -extern int trace_lock_tcd(struct trace_cpu_data *tcd); -extern void trace_unlock_tcd(struct trace_cpu_data *tcd); -extern char *trace_get_console_buffer(void); -extern void trace_put_console_buffer(char *buffer); - -extern void trace_call_on_all_cpus(void (*fn)(void *arg), void *arg); - -int trace_refill_stock(struct trace_cpu_data *tcd, int gfp, - struct list_head *stock); - - -int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage); - -extern void trace_assertion_failed(const char *str, const char *fn, - const char *file, int line); - -/* ASSERTION that is safe to use within the debug system */ -#define __LASSERT(cond) \ -({ \ - if (unlikely(!(cond))) { \ - trace_assertion_failed("ASSERTION("#cond") failed", \ - __FUNCTION__, __FILE__, __LINE__); \ - } \ -}) - -#define __LASSERT_TAGE_INVARIANT(tage) \ -({ \ - __LASSERT(tage != NULL); \ - __LASSERT(tage->page != NULL); \ - __LASSERT(tage->used <= CFS_PAGE_SIZE); \ - __LASSERT(cfs_page_count(tage->page) > 0); \ -}) - -#endif /* LUSTRE_TRACEFILE_PRIVATE */ - -#endif /* __LIBCFS_TRACEFILE_H__ */ diff --git a/lnet/libcfs/user-bitops.c b/lnet/libcfs/user-bitops.c deleted file mode 100644 index 8f9459335976481d18627b6166c96a7b74b28e47..0000000000000000000000000000000000000000 --- a/lnet/libcfs/user-bitops.c +++ /dev/null @@ -1,98 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2007 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - */ -#ifndef __KERNEL__ - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include <libcfs/user-bitops.h> - -#define OFF_BY_START(start) ((start)/BITS_PER_LONG) - -unsigned long find_next_bit(unsigned long *addr, - unsigned long size, unsigned long offset) -{ - unsigned long *word, *last; - unsigned long first_bit, bit, base; - - word = addr + OFF_BY_START(offset); - last = addr + OFF_BY_START(size-1); - first_bit = offset % BITS_PER_LONG; - base = offset - first_bit; - - if (offset >= size) - return size; - if (first_bit != 0) { - int tmp = (*word++) & (~0UL << first_bit); - bit = __ffs(tmp); - if (bit < BITS_PER_LONG) - goto found; - word++; - base += BITS_PER_LONG; - } - while (word <= last) { - if (*word != 0UL) { - bit = __ffs(*word); - goto found; - } - word++; - base += BITS_PER_LONG; - } - return size; -found: - return base + bit; -} - -unsigned long find_next_zero_bit(unsigned long *addr, - unsigned long size, unsigned long offset) -{ - unsigned long *word, *last; - unsigned long first_bit, bit, base; - - word = addr + OFF_BY_START(offset); - last = addr + OFF_BY_START(size-1); - first_bit = offset % BITS_PER_LONG; - base = offset - first_bit; - - if (offset >= size) - return size; - if (first_bit != 0) { - int tmp = (*word++) & (~0UL << first_bit); - bit = __ffz(tmp); - if (bit < BITS_PER_LONG) - goto found; - word++; - base += BITS_PER_LONG; - } - while (word <= last) { - if (*word != ~0UL) { - bit = __ffz(*word); - goto found; - } - word++; - base += BITS_PER_LONG; - } - return size; -found: - return base + bit; -} - -#endif diff --git a/lnet/libcfs/user-lock.c b/lnet/libcfs/user-lock.c deleted file mode 100644 index c521dc7415025dbb1b39ae76dc144ecdd5273129..0000000000000000000000000000000000000000 --- a/lnet/libcfs/user-lock.c +++ /dev/null @@ -1,343 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for user-level. - * - */ - -/* Implementations of portable synchronization APIs for liblustre */ - -/* - * liblustre is single-threaded, so most "synchronization" APIs are trivial. - * - * XXX Liang: There are several branches share lnet with b_hd_newconfig, - * if we define lock APIs at here, there will be conflict with liblustre - * in other branches. - */ - -#ifndef __KERNEL__ - -#include <stdlib.h> -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * Optional debugging (magic stamping and checking ownership) can be added. - */ - -#if 0 -/* - * spin_lock - * - * - spin_lock_init(x) - * - spin_lock(x) - * - spin_unlock(x) - * - spin_trylock(x) - * - * - spin_lock_irqsave(x, f) - * - spin_unlock_irqrestore(x, f) - * - * No-op implementation. - */ - -void spin_lock_init(spinlock_t *lock) -{ - LASSERT(lock != NULL); - (void)lock; -} - -void spin_lock(spinlock_t *lock) -{ - (void)lock; -} - -void spin_unlock(spinlock_t *lock) -{ - (void)lock; -} - -int spin_trylock(spinlock_t *lock) -{ - (void)lock; - return 1; -} - -void spin_lock_bh_init(spinlock_t *lock) -{ - LASSERT(lock != NULL); - (void)lock; -} - -void spin_lock_bh(spinlock_t *lock) -{ - LASSERT(lock != NULL); - (void)lock; -} - -void spin_unlock_bh(spinlock_t *lock) -{ - LASSERT(lock != NULL); - (void)lock; -} - -/* - * Semaphore - * - * - sema_init(x, v) - * - __down(x) - * - __up(x) - */ -struct semaphore {}; - -void sema_init(struct semaphore *s, int val) -{ - LASSERT(s != NULL); - (void)s; - (void)val; -} - -void __down(struct semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -void __up(struct semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -/* - * Mutex: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ - -#define mutex_up(s) __up(s) -#define mutex_down(s) __down(s) - -#define init_mutex(x) sema_init(x, 1) -#define init_mutex_locked(x) sema_init(x, 0) - -/* - * Completion: - * - * - init_completion(c) - * - complete(c) - * - wait_for_completion(c) - */ -struct completion {}; - -void init_completion(struct completion *c) -{ - LASSERT(c != NULL); - (void)c; -} - -void complete(struct completion *c) -{ - LASSERT(c != NULL); - (void)c; -} - -void wait_for_completion(struct completion *c) -{ - LASSERT(c != NULL); - (void)c; -} - -/* - * rw_semaphore: - * - * - DECLARE_RWSEM(x) - * - init_rwsem(x) - * - down_read(x) - * - up_read(x) - * - down_write(x) - * - up_write(x) - */ -struct rw_semaphore {}; - -void init_rwsem(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -void down_read(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -int down_read_trylock(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; - return 1; -} - -void down_write(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -int down_write_trylock(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; - return 1; -} - -void up_read(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -void up_write(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} -#endif - -#ifdef HAVE_LIBPTHREAD - -/* - * Completion - */ - -void cfs_init_completion(struct cfs_completion *c) -{ - LASSERT(c != NULL); - c->c_done = 0; - pthread_mutex_init(&c->c_mut, NULL); - pthread_cond_init(&c->c_cond, NULL); -} - -void cfs_fini_completion(struct cfs_completion *c) -{ - LASSERT(c != NULL); - pthread_mutex_destroy(&c->c_mut); - pthread_cond_destroy(&c->c_cond); -} - -void cfs_complete(struct cfs_completion *c) -{ - LASSERT(c != NULL); - pthread_mutex_lock(&c->c_mut); - c->c_done++; - pthread_cond_signal(&c->c_cond); - pthread_mutex_unlock(&c->c_mut); -} - -void cfs_wait_for_completion(struct cfs_completion *c) -{ - LASSERT(c != NULL); - pthread_mutex_lock(&c->c_mut); - while (c->c_done == 0) - pthread_cond_wait(&c->c_cond, &c->c_mut); - c->c_done--; - pthread_mutex_unlock(&c->c_mut); -} - -/* - * atomic primitives - */ - -static pthread_mutex_t atomic_guard_lock = PTHREAD_MUTEX_INITIALIZER; - -int cfs_atomic_read(cfs_atomic_t *a) -{ - int r; - - pthread_mutex_lock(&atomic_guard_lock); - r = a->counter; - pthread_mutex_unlock(&atomic_guard_lock); - return r; -} - -void cfs_atomic_set(cfs_atomic_t *a, int b) -{ - pthread_mutex_lock(&atomic_guard_lock); - a->counter = b; - pthread_mutex_unlock(&atomic_guard_lock); -} - -int cfs_atomic_dec_and_test(cfs_atomic_t *a) -{ - int r; - - pthread_mutex_lock(&atomic_guard_lock); - r = --a->counter; - pthread_mutex_unlock(&atomic_guard_lock); - return (r == 0); -} - -void cfs_atomic_inc(cfs_atomic_t *a) -{ - pthread_mutex_lock(&atomic_guard_lock); - ++a->counter; - pthread_mutex_unlock(&atomic_guard_lock); -} - -void cfs_atomic_dec(cfs_atomic_t *a) -{ - pthread_mutex_lock(&atomic_guard_lock); - --a->counter; - pthread_mutex_unlock(&atomic_guard_lock); -} -void cfs_atomic_add(int b, cfs_atomic_t *a) - -{ - pthread_mutex_lock(&atomic_guard_lock); - a->counter += b; - pthread_mutex_unlock(&atomic_guard_lock); -} - -void cfs_atomic_sub(int b, cfs_atomic_t *a) -{ - pthread_mutex_lock(&atomic_guard_lock); - a->counter -= b; - pthread_mutex_unlock(&atomic_guard_lock); -} - -#endif /* HAVE_LIBPTHREAD */ - - -/* !__KERNEL__ */ -#endif - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/user-prim.c b/lnet/libcfs/user-prim.c deleted file mode 100644 index ffa32c193d501a76dd8430ef4e57f10d152ead0a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/user-prim.c +++ /dev/null @@ -1,399 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable APIs for user-level. - * - */ - -/* Implementations of portable APIs for liblustre */ - -/* - * liblustre is single-threaded, so most "synchronization" APIs are trivial. - */ - -#ifndef __KERNEL__ - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#include <sys/mman.h> -#ifndef __CYGWIN__ -#include <stdint.h> -#ifdef HAVE_ASM_PAGE_H -#include <asm/page.h> -#endif -#ifdef HAVE_SYS_USER_H -#include <sys/user.h> -#endif -#else -#include <sys/types.h> -#endif -#include <stdlib.h> -#include <string.h> -#include <signal.h> -#include <errno.h> -#include <sys/stat.h> -#ifdef HAVE_SYS_VFS_H -#include <sys/vfs.h> -#endif - -/* - * Sleep channel. No-op implementation. - */ - -void cfs_waitq_init(struct cfs_waitq *waitq) -{ - LASSERT(waitq != NULL); - (void)waitq; -} - -void cfs_waitlink_init(struct cfs_waitlink *link) -{ - LASSERT(link != NULL); - (void)link; -} - -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - (void)waitq; - (void)link; -} - -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, struct cfs_waitlink *link) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - (void)waitq; - (void)link; -} - -void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - (void)waitq; - (void)link; -} - -void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - (void)waitq; - (void)link; -} - -int cfs_waitq_active(struct cfs_waitq *waitq) -{ - LASSERT(waitq != NULL); - (void)waitq; - return 0; -} - -void cfs_waitq_signal(struct cfs_waitq *waitq) -{ - LASSERT(waitq != NULL); - (void)waitq; -} - -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr) -{ - LASSERT(waitq != NULL); - (void)waitq; -} - -void cfs_waitq_broadcast(struct cfs_waitq *waitq) -{ - LASSERT(waitq != NULL); - (void)waitq; -} - -void cfs_waitq_wait(struct cfs_waitlink *link, int state) -{ - LASSERT(link != NULL); - (void)link; -} - -int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout) -{ - LASSERT(link != NULL); - (void)link; - return 0; -} - -#ifdef HAVE_LIBPTHREAD - -/* - * Threads - */ - -struct lustre_thread_arg { - cfs_thread_t f; - void *arg; -}; -static void *cfs_thread_helper(void *data) -{ - struct lustre_thread_arg *targ = data; - cfs_thread_t f = targ->f; - void *arg = targ->arg; - - free(targ); - - (void)f(arg); - return NULL; -} -int cfs_create_thread(cfs_thread_t func, void *arg) -{ - pthread_t tid; - pthread_attr_t tattr; - int rc; - struct lustre_thread_arg *targ_p = malloc(sizeof(struct lustre_thread_arg)); - - if ( targ_p == NULL ) - return -ENOMEM; - - targ_p->f = func; - targ_p->arg = arg; - - pthread_attr_init(&tattr); - pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED); - rc = pthread_create(&tid, &tattr, cfs_thread_helper, targ_p); - pthread_attr_destroy(&tattr); - return -rc; -} -#endif - -uid_t cfs_curproc_uid(void) -{ - return getuid(); -} - -int cfs_parse_int_tunable(int *value, char *name) -{ - char *env = getenv(name); - char *end; - - if (env == NULL) - return 0; - - *value = strtoull(env, &end, 0); - if (*end == 0) - return 0; - - CERROR("Can't parse tunable %s=%s\n", name, env); - return -EINVAL; -} - -/* - * Allocator - */ - -cfs_page_t *cfs_alloc_page(unsigned int flags) -{ - cfs_page_t *pg = malloc(sizeof(*pg)); - - if (!pg) - return NULL; - pg->addr = malloc(CFS_PAGE_SIZE); - - if (!pg->addr) { - free(pg); - return NULL; - } - return pg; -} - -void cfs_free_page(cfs_page_t *pg) -{ - free(pg->addr); - free(pg); -} - -void *cfs_page_address(cfs_page_t *pg) -{ - return pg->addr; -} - -void *cfs_kmap(cfs_page_t *pg) -{ - return pg->addr; -} - -void cfs_kunmap(cfs_page_t *pg) -{ -} - -/* - * SLAB allocator - */ - -cfs_mem_cache_t * -cfs_mem_cache_create(const char *name, size_t objsize, size_t off, unsigned long flags) -{ - cfs_mem_cache_t *c; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->size = objsize; - CDEBUG(D_MALLOC, "alloc slab cache %s at %p, objsize %d\n", - name, c, (int)objsize); - return c; -} - -int cfs_mem_cache_destroy(cfs_mem_cache_t *c) -{ - CDEBUG(D_MALLOC, "destroy slab cache %p, objsize %u\n", c, c->size); - free(c); - return 0; -} - -void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp) -{ - return cfs_alloc(c->size, gfp); -} - -void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr) -{ - cfs_free(addr); -} - -void cfs_enter_debugger(void) -{ - /* - * nothing for now. - */ -} - -void cfs_daemonize(char *str) -{ - return; -} - -int cfs_daemonize_ctxt(char *str) -{ - return 0; -} - -cfs_sigset_t cfs_block_allsigs(void) -{ - cfs_sigset_t all; - cfs_sigset_t old; - int rc; - - sigfillset(&all); - rc = sigprocmask(SIG_SETMASK, &all, &old); - LASSERT(rc == 0); - - return old; -} - -cfs_sigset_t cfs_block_sigs(cfs_sigset_t blocks) -{ - cfs_sigset_t old; - int rc; - - rc = sigprocmask(SIG_SETMASK, &blocks, &old); - LASSERT (rc == 0); - - return old; -} - -void cfs_restore_sigs(cfs_sigset_t old) -{ - int rc = sigprocmask(SIG_SETMASK, &old, NULL); - - LASSERT (rc == 0); -} - -int cfs_signal_pending(void) -{ - cfs_sigset_t empty; - cfs_sigset_t set; - int rc; - - rc = sigpending(&set); - LASSERT (rc == 0); - - sigemptyset(&empty); - - return !memcmp(&empty, &set, sizeof(set)); -} - -void cfs_clear_sigpending(void) -{ - return; -} - -#ifdef __linux__ - -/* - * In glibc (NOT in Linux, so check above is not right), implement - * stack-back-tracing through backtrace() function. - */ -#include <execinfo.h> - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{ - backtrace(trace->frame, sizeof_array(trace->frame)); -} - -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - if (0 <= frame_no && frame_no < sizeof_array(trace->frame)) - return trace->frame[frame_no]; - else - return NULL; -} - -#else - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{} -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - return NULL; -} - -/* __linux__ */ -#endif - -void lbug_with_loc(char *file, const char *func, const int line) -{ - /* No libcfs_catastrophe in userspace! */ - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n"); - abort(); -} - -/* !__KERNEL__ */ -#endif - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/user-tcpip.c b/lnet/libcfs/user-tcpip.c deleted file mode 100644 index e0cedb9921909b0c7b1cefc563e7f3a0632ab1f9..0000000000000000000000000000000000000000 --- a/lnet/libcfs/user-tcpip.c +++ /dev/null @@ -1,606 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#if !defined(__KERNEL__) || !defined(REDSTORM) - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#include <sys/socket.h> -#ifdef HAVE_NETINET_IN_H -#include <netinet/in.h> -#endif -#include <netinet/tcp.h> -#include <sys/ioctl.h> -#include <unistd.h> -#include <string.h> -#include <unistd.h> -#include <poll.h> -#include <net/if.h> -#include <arpa/inet.h> -#include <errno.h> -#if defined(__sun__) || defined(__sun) -#include <sys/sockio.h> -#endif -#ifndef __CYGWIN__ -#include <sys/syscall.h> -#endif - -/* - * Functions to get network interfaces info - */ - -int -libcfs_sock_ioctl(int cmd, unsigned long arg) -{ - int fd, rc; - - fd = socket(AF_INET, SOCK_STREAM, 0); - - if (fd < 0) { - rc = -errno; - CERROR("socket() failed: errno==%d\n", errno); - return rc; - } - - rc = ioctl(fd, cmd, arg); - - close(fd); - return rc; -} - -int -libcfs_ipif_query (char *name, int *up, __u32 *ip) -{ - struct ifreq ifr; - int nob; - int rc; - __u32 val; - - nob = strlen(name); - if (nob >= IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - return -EINVAL; - } - - CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); - - strcpy(ifr.ifr_name, name); - rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); - - if (rc != 0) { - CERROR("Can't get flags for interface %s\n", name); - return rc; - } - - if ((ifr.ifr_flags & IFF_UP) == 0) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = 0; - return 0; - } - - *up = 1; - - strcpy(ifr.ifr_name, name); - ifr.ifr_addr.sa_family = AF_INET; - rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); - - if (rc != 0) { - CERROR("Can't get IP address for interface %s\n", name); - return rc; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - return 0; -} - -void -libcfs_ipif_free_enumeration (char **names, int n) -{ - int i; - - LASSERT (n > 0); - - for (i = 0; i < n && names[i] != NULL; i++) - LIBCFS_FREE(names[i], IFNAMSIZ); - - LIBCFS_FREE(names, n * sizeof(*names)); -} - -int -libcfs_ipif_enumerate (char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - char **names; - int nalloc; - int nfound; - struct ifreq *ifr; - struct ifconf ifc; - int rc; - int nob; - int i; - - - nalloc = 16; /* first guess at max interfaces */ - for (;;) { - LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); - if (ifr == NULL) { - CERROR ("ENOMEM enumerating up to %d interfaces\n", - nalloc); - rc = -ENOMEM; - goto out0; - } - - ifc.ifc_buf = (char *)ifr; - ifc.ifc_len = nalloc * sizeof(*ifr); - - rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc); - - if (rc < 0) { - CERROR ("Error %d enumerating interfaces\n", rc); - goto out1; - } - - LASSERT (rc == 0); - - nfound = ifc.ifc_len/sizeof(*ifr); - LASSERT (nfound <= nalloc); - - if (nfound < nalloc) - break; - - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - nalloc *= 2; - } - - if (nfound == 0) - goto out1; - - LIBCFS_ALLOC(names, nfound * sizeof(*names)); - if (names == NULL) { - rc = -ENOMEM; - goto out1; - } - /* NULL out all names[i] */ - memset (names, 0, nfound * sizeof(*names)); - - for (i = 0; i < nfound; i++) { - - nob = strlen (ifr[i].ifr_name); - if (nob >= IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, ifr[i].ifr_name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out2; - } - - LIBCFS_ALLOC(names[i], IFNAMSIZ); - if (names[i] == NULL) { - rc = -ENOMEM; - goto out2; - } - - memcpy(names[i], ifr[i].ifr_name, nob); - names[i][nob] = 0; - } - - *namesp = names; - rc = nfound; - - out2: - if (rc < 0) - libcfs_ipif_free_enumeration(names, nfound); - out1: - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - out0: - return rc; -} - -/* - * Network functions used by user-land lnet acceptor - */ - -int -libcfs_sock_listen (int *sockp, __u32 local_ip, int local_port, int backlog) -{ - int rc; - int option; - struct sockaddr_in locaddr; - - *sockp = socket(AF_INET, SOCK_STREAM, 0); - if (*sockp < 0) { - rc = -errno; - CERROR("socket() failed: errno==%d\n", errno); - return rc; - } - - option = 1; - if ( setsockopt(*sockp, SOL_SOCKET, SO_REUSEADDR, - (char *)&option, sizeof (option)) ) { - rc = -errno; - CERROR("setsockopt(SO_REUSEADDR) failed: errno==%d\n", errno); - goto failed; - } - - if (local_ip != 0 || local_port != 0) { - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons(local_port); - locaddr.sin_addr.s_addr = (local_ip == 0) ? - INADDR_ANY : htonl(local_ip); - - if ( bind(*sockp, (struct sockaddr *)&locaddr, sizeof(locaddr)) ) { - rc = -errno; - if ( errno == -EADDRINUSE ) - CDEBUG(D_NET, "Port %d already in use\n", - local_port); - else - CERROR("bind() to port %d failed: errno==%d\n", - local_port, errno); - goto failed; - } - } - - if ( listen(*sockp, backlog) ) { - rc = -errno; - CERROR("listen() with backlog==%d failed: errno==%d\n", - backlog, errno); - goto failed; - } - - return 0; - - failed: - close(*sockp); - return rc; -} - -int -libcfs_sock_accept (int *newsockp, int sock, __u32 *peer_ip, int *peer_port) -{ - struct sockaddr_in accaddr; - socklen_t accaddr_len = sizeof(struct sockaddr_in); - - *newsockp = accept(sock, (struct sockaddr *)&accaddr, &accaddr_len); - - if ( *newsockp < 0 ) { - CERROR("accept() failed: errno==%d\n", errno); - return -errno; - } - - *peer_ip = ntohl(accaddr.sin_addr.s_addr); - *peer_port = ntohs(accaddr.sin_port); - - return 0; -} - -int -libcfs_sock_read (int sock, void *buffer, int nob, int timeout) -{ - int rc; - struct pollfd pfd; - cfs_time_t start_time = cfs_time_current(); - - pfd.fd = sock; - pfd.events = POLLIN; - pfd.revents = 0; - - /* poll(2) measures timeout in msec */ - timeout *= 1000; - - while (nob != 0 && timeout > 0) { - cfs_time_t current_time; - - rc = poll(&pfd, 1, timeout); - if (rc < 0) - return -errno; - if (rc == 0) - return -ETIMEDOUT; - if ((pfd.revents & POLLIN) == 0) - return -EIO; - - rc = read(sock, buffer, nob); - if (rc < 0) - return -errno; - if (rc == 0) - return -EIO; - - buffer = ((char *)buffer) + rc; - nob -= rc; - - current_time = cfs_time_current(); - timeout -= cfs_duration_sec(cfs_time_sub(cfs_time_current(), - start_time)); - } - - if (nob == 0) - return 0; - else - return -ETIMEDOUT; -} - -/* Just try to connect to localhost to wake up entity that are - * sleeping in accept() */ -void -libcfs_sock_abort_accept(__u16 port) -{ - int fd, rc; - struct sockaddr_in locaddr; - - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons(port); - locaddr.sin_addr.s_addr = inet_addr("127.0.0.1"); - - fd = socket(AF_INET, SOCK_STREAM, 0); - if ( fd < 0 ) { - CERROR("socket() failed: errno==%d\n", errno); - return; - } - - rc = connect(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); - if ( rc != 0 ) { - if ( errno != ECONNREFUSED ) - CERROR("connect() failed: errno==%d\n", errno); - else - CDEBUG(D_NET, "Nobody to wake up at %d\n", port); - } - - close(fd); -} - -/* - * Network functions of common use - */ - -int -libcfs_getpeername(int sock_fd, __u32 *ipaddr_p, __u16 *port_p) -{ - int rc; - struct sockaddr_in peer_addr; - socklen_t peer_addr_len = sizeof(peer_addr); - - rc = getpeername(sock_fd, (struct sockaddr *)&peer_addr, &peer_addr_len); - if (rc != 0) - return -errno; - - if (ipaddr_p != NULL) - *ipaddr_p = ntohl(peer_addr.sin_addr.s_addr); - if (port_p != NULL) - *port_p = ntohs(peer_addr.sin_port); - - return 0; -} - -int -libcfs_socketpair(int *fdp) -{ - int rc, i; - - rc = socketpair(AF_UNIX, SOCK_STREAM, 0, fdp); - if (rc != 0) { - rc = -errno; - CERROR ("Cannot create socket pair\n"); - return rc; - } - - for (i = 0; i < 2; i++) { - rc = libcfs_fcntl_nonblock(fdp[i]); - if (rc) { - close(fdp[0]); - close(fdp[1]); - return rc; - } - } - - return 0; -} - -int -libcfs_fcntl_nonblock(int fd) -{ - int rc, flags; - - flags = fcntl(fd, F_GETFL, 0); - if (flags == -1) { - rc = -errno; - CERROR ("Cannot get socket flags\n"); - return rc; - } - - rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); - if (rc != 0) { - rc = -errno; - CERROR ("Cannot set socket flags\n"); - return rc; - } - - return 0; -} - -int -libcfs_sock_set_nagle(int fd, int nagle) -{ - int rc; - int option = nagle ? 0 : 1; - -#if defined(__sun__) || defined(__sun) - rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option)); -#else - rc = setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); -#endif - - if (rc != 0) { - rc = -errno; - CERROR ("Cannot set NODELAY socket option\n"); - return rc; - } - - return 0; -} - -int -libcfs_sock_set_bufsiz(int fd, int bufsiz) -{ - int rc, option; - - LASSERT (bufsiz != 0); - - option = bufsiz; - rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); - if (rc != 0) { - rc = -errno; - CERROR ("Cannot set SNDBUF socket option\n"); - return rc; - } - - option = bufsiz; - rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); - if (rc != 0) { - rc = -errno; - CERROR ("Cannot set RCVBUF socket option\n"); - return rc; - } - - return 0; -} - -int -libcfs_sock_create(int *fdp) -{ - int rc, fd, option; - - fd = socket(AF_INET, SOCK_STREAM, 0); - if (fd < 0) { - rc = -errno; - CERROR ("Cannot create socket\n"); - return rc; - } - - option = 1; - rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &option, sizeof(option)); - if (rc != 0) { - rc = -errno; - CERROR ("Cannot set SO_REUSEADDR for socket\n"); - close(fd); - return rc; - } - - *fdp = fd; - return 0; -} - -int -libcfs_sock_bind_to_port(int fd, __u16 port) -{ - int rc; - struct sockaddr_in locaddr; - - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_addr.s_addr = INADDR_ANY; - locaddr.sin_port = htons(port); - - rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); - if (rc != 0) { - rc = -errno; - CERROR ("Cannot bind to port %d\n", port); - return rc; - } - - return 0; -} - -int -libcfs_sock_connect(int fd, __u32 ip, __u16 port) -{ - int rc; - struct sockaddr_in addr; - - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); - - rc = connect(fd, (struct sockaddr *)&addr, - sizeof(struct sockaddr_in)); - - if(rc != 0 && errno != EINPROGRESS) { - rc = -errno; - if (rc != -EADDRINUSE && rc != -EADDRNOTAVAIL) - CERROR ("Cannot connect to %u.%u.%u.%u:%d (err=%d)\n", - HIPQUAD(ip), port, errno); - return rc; - } - - return 0; -} - -/* NB: EPIPE and ECONNRESET are considered as non-fatal - * because: - * 1) it still makes sense to continue reading && - * 2) anyway, poll() will set up POLLHUP|POLLERR flags */ -int libcfs_sock_writev(int fd, const struct iovec *vector, int count) -{ - int rc; - - rc = syscall(SYS_writev, fd, vector, count); - - if (rc == 0) /* write nothing */ - return 0; - - if (rc < 0) { - if (errno == EAGAIN || /* write nothing */ - errno == EPIPE || /* non-fatal error */ - errno == ECONNRESET) /* non-fatal error */ - return 0; - else - return -errno; - } - - return rc; -} - -int libcfs_sock_readv(int fd, const struct iovec *vector, int count) -{ - int rc; - - rc = syscall(SYS_readv, fd, vector, count); - - if (rc == 0) /* EOF */ - return -EIO; - - if (rc < 0) { - if (errno == EAGAIN) /* read nothing */ - return 0; - else - return -errno; - } - - return rc; -} - -#endif /* !__KERNEL__ || !defined(REDSTORM) */ diff --git a/lnet/libcfs/watchdog.c b/lnet/libcfs/watchdog.c deleted file mode 100644 index 89d757c2afb7ae4c1c7135fe002d72ace9eb1bb8..0000000000000000000000000000000000000000 --- a/lnet/libcfs/watchdog.c +++ /dev/null @@ -1,427 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Jacob Berkman <jacob@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> -#include "tracefile.h" - -struct lc_watchdog { - cfs_timer_t lcw_timer; /* kernel timer */ - struct list_head lcw_list; - struct timeval lcw_last_touched; - cfs_task_t *lcw_task; - - void (*lcw_callback)(pid_t, void *); - void *lcw_data; - - pid_t lcw_pid; - cfs_duration_t lcw_time; /* time until watchdog fires, jiffies */ - - enum { - LC_WATCHDOG_DISABLED, - LC_WATCHDOG_ENABLED, - LC_WATCHDOG_EXPIRED - } lcw_state; -}; - -#ifdef WITH_WATCHDOG -/* - * The dispatcher will complete lcw_start_completion when it starts, - * and lcw_stop_completion when it exits. - * Wake lcw_event_waitq to signal timer callback dispatches. - */ -static struct completion lcw_start_completion; -static struct completion lcw_stop_completion; -static wait_queue_head_t lcw_event_waitq; - -/* - * Set this and wake lcw_event_waitq to stop the dispatcher. - */ -enum { - LCW_FLAG_STOP = 0 -}; -static unsigned long lcw_flags = 0; - -/* - * Number of outstanding watchdogs. - * When it hits 1, we start the dispatcher. - * When it hits 0, we stop the distpatcher. - */ -static __u32 lcw_refcount = 0; -static DECLARE_MUTEX(lcw_refcount_sem); - -/* - * List of timers that have fired that need their callbacks run by the - * dispatcher. - */ -static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; /* BH lock! */ -static struct list_head lcw_pending_timers = \ - LIST_HEAD_INIT(lcw_pending_timers); - -#ifdef HAVE_TASKLIST_LOCK -static void -lcw_dump(struct lc_watchdog *lcw) -{ - cfs_task_t *tsk; - ENTRY; - - read_lock(&tasklist_lock); - tsk = find_task_by_pid(lcw->lcw_pid); - - if (tsk == NULL) { - CWARN("Process %d was not found in the task list; " - "watchdog callback may be incomplete\n", (int)lcw->lcw_pid); - } else if (tsk != lcw->lcw_task) { - CWARN("The current process %d did not set the watchdog; " - "watchdog callback may be incomplete\n", (int)lcw->lcw_pid); - } else { - libcfs_debug_dumpstack(tsk); - } - - read_unlock(&tasklist_lock); - EXIT; -} -#else -static void -lcw_dump(struct lc_watchdog *lcw) -{ - CERROR("unable to dump stack because of missing export\n"); -} -#endif - -static void lcw_cb(unsigned long data) -{ - struct lc_watchdog *lcw = (struct lc_watchdog *)data; - - ENTRY; - - if (lcw->lcw_state != LC_WATCHDOG_ENABLED) { - EXIT; - return; - } - - lcw->lcw_state = LC_WATCHDOG_EXPIRED; - - /* NB this warning should appear on the console, but may not get into - * the logs since we're running in a softirq handler */ - - CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n", - (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time)); - lcw_dump(lcw); - - spin_lock_bh(&lcw_pending_timers_lock); - - if (list_empty(&lcw->lcw_list)) { - list_add(&lcw->lcw_list, &lcw_pending_timers); - wake_up(&lcw_event_waitq); - } - - spin_unlock_bh(&lcw_pending_timers_lock); - - EXIT; -} - -static int is_watchdog_fired(void) -{ - int rc; - - if (test_bit(LCW_FLAG_STOP, &lcw_flags)) - return 1; - - spin_lock_bh(&lcw_pending_timers_lock); - rc = !list_empty(&lcw_pending_timers); - spin_unlock_bh(&lcw_pending_timers_lock); - return rc; -} - -static int lcw_dispatch_main(void *data) -{ - int rc = 0; - unsigned long flags; - struct lc_watchdog *lcw; - - ENTRY; - - cfs_daemonize("lc_watchdogd"); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - - complete(&lcw_start_completion); - - while (1) { - wait_event_interruptible(lcw_event_waitq, is_watchdog_fired()); - CDEBUG(D_INFO, "Watchdog got woken up...\n"); - if (test_bit(LCW_FLAG_STOP, &lcw_flags)) { - CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n"); - - spin_lock_bh(&lcw_pending_timers_lock); - rc = !list_empty(&lcw_pending_timers); - spin_unlock_bh(&lcw_pending_timers_lock); - if (rc) { - CERROR("pending timers list was not empty at " - "time of watchdog dispatch shutdown\n"); - } - break; - } - - spin_lock_bh(&lcw_pending_timers_lock); - while (!list_empty(&lcw_pending_timers)) { - - lcw = list_entry(lcw_pending_timers.next, - struct lc_watchdog, - lcw_list); - list_del_init(&lcw->lcw_list); - spin_unlock_bh(&lcw_pending_timers_lock); - - CDEBUG(D_INFO, "found lcw for pid %d: inactive for " - "%lds\n", (int)lcw->lcw_pid, - cfs_duration_sec(lcw->lcw_time)); - - if (lcw->lcw_state != LC_WATCHDOG_DISABLED) - lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data); - - spin_lock_bh(&lcw_pending_timers_lock); - } - spin_unlock_bh(&lcw_pending_timers_lock); - } - - complete(&lcw_stop_completion); - - RETURN(rc); -} - -static void lcw_dispatch_start(void) -{ - int rc; - - ENTRY; - LASSERT(lcw_refcount == 1); - - init_completion(&lcw_stop_completion); - init_completion(&lcw_start_completion); - init_waitqueue_head(&lcw_event_waitq); - - CDEBUG(D_INFO, "starting dispatch thread\n"); - rc = kernel_thread(lcw_dispatch_main, NULL, 0); - if (rc < 0) { - CERROR("error spawning watchdog dispatch thread: %d\n", rc); - EXIT; - return; - } - wait_for_completion(&lcw_start_completion); - CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n"); - - EXIT; -} - -static void lcw_dispatch_stop(void) -{ - ENTRY; - LASSERT(lcw_refcount == 0); - - CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n"); - - set_bit(LCW_FLAG_STOP, &lcw_flags); - wake_up(&lcw_event_waitq); - - wait_for_completion(&lcw_stop_completion); - - CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n"); - - EXIT; -} - -struct lc_watchdog *lc_watchdog_add(int timeout_ms, - void (*callback)(pid_t, void *), - void *data) -{ - struct lc_watchdog *lcw = NULL; - ENTRY; - - LIBCFS_ALLOC(lcw, sizeof(*lcw)); - if (lcw == NULL) { - CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n"); - RETURN(ERR_PTR(-ENOMEM)); - } - - lcw->lcw_task = cfs_current(); - lcw->lcw_pid = cfs_curproc_pid(); - lcw->lcw_time = cfs_time_seconds(timeout_ms) / 1000; - lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog; - lcw->lcw_data = data; - lcw->lcw_state = LC_WATCHDOG_DISABLED; - - INIT_LIST_HEAD(&lcw->lcw_list); - - lcw->lcw_timer.function = lcw_cb; - lcw->lcw_timer.data = (unsigned long)lcw; - lcw->lcw_timer.expires = jiffies + lcw->lcw_time; - init_timer(&lcw->lcw_timer); - - down(&lcw_refcount_sem); - if (++lcw_refcount == 1) - lcw_dispatch_start(); - up(&lcw_refcount_sem); - - /* Keep this working in case we enable them by default */ - if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { - do_gettimeofday(&lcw->lcw_last_touched); - add_timer(&lcw->lcw_timer); - } - - RETURN(lcw); -} -EXPORT_SYMBOL(lc_watchdog_add); - -static void lcw_update_time(struct lc_watchdog *lcw, const char *message) -{ - struct timeval newtime; - struct timeval timediff; - - do_gettimeofday(&newtime); - if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) { - cfs_timeval_sub(&newtime, &lcw->lcw_last_touched, &timediff); - CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n", - lcw->lcw_pid, - message, - timediff.tv_sec, - timediff.tv_usec / 100); - } - lcw->lcw_last_touched = newtime; -} - -void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms) -{ - ENTRY; - LASSERT(lcw != NULL); - - spin_lock_bh(&lcw_pending_timers_lock); - list_del_init(&lcw->lcw_list); - spin_unlock_bh(&lcw_pending_timers_lock); - - lcw_update_time(lcw, "touched"); - lcw->lcw_state = LC_WATCHDOG_ENABLED; - - mod_timer(&lcw->lcw_timer, jiffies + - cfs_time_seconds(timeout_ms) / 1000); - - EXIT; -} -EXPORT_SYMBOL(lc_watchdog_touch_ms); - -/* deprecated - use above instead */ -void lc_watchdog_touch(struct lc_watchdog *lcw) -{ - lc_watchdog_touch_ms(lcw, cfs_duration_sec(lcw->lcw_time) * 1000); -} -EXPORT_SYMBOL(lc_watchdog_touch); - -void lc_watchdog_disable(struct lc_watchdog *lcw) -{ - ENTRY; - LASSERT(lcw != NULL); - - spin_lock_bh(&lcw_pending_timers_lock); - if (!list_empty(&lcw->lcw_list)) - list_del_init(&lcw->lcw_list); - spin_unlock_bh(&lcw_pending_timers_lock); - - lcw_update_time(lcw, "disabled"); - lcw->lcw_state = LC_WATCHDOG_DISABLED; - - EXIT; -} -EXPORT_SYMBOL(lc_watchdog_disable); - -void lc_watchdog_delete(struct lc_watchdog *lcw) -{ - ENTRY; - LASSERT(lcw != NULL); - - del_timer(&lcw->lcw_timer); - - lcw_update_time(lcw, "deleted"); - - spin_lock_bh(&lcw_pending_timers_lock); - if (!list_empty(&lcw->lcw_list)) - list_del_init(&lcw->lcw_list); - spin_unlock_bh(&lcw_pending_timers_lock); - - down(&lcw_refcount_sem); - if (--lcw_refcount == 0) - lcw_dispatch_stop(); - up(&lcw_refcount_sem); - - LIBCFS_FREE(lcw, sizeof(*lcw)); - - EXIT; -} -EXPORT_SYMBOL(lc_watchdog_delete); - -/* - * Provided watchdog handlers - */ - -void lc_watchdog_dumplog(pid_t pid, void *data) -{ - libcfs_debug_dumplog_internal((void *)((unsigned long)pid)); -} -EXPORT_SYMBOL(lc_watchdog_dumplog); - -#else /* !defined(WITH_WATCHDOG) */ - -struct lc_watchdog *lc_watchdog_add(int timeout_ms, - void (*callback)(pid_t pid, void *), - void *data) -{ - static struct lc_watchdog watchdog; - return &watchdog; -} -EXPORT_SYMBOL(lc_watchdog_add); - -void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms) -{ -} -EXPORT_SYMBOL(lc_watchdog_touch_ms); - -void lc_watchdog_touch(struct lc_watchdog *lcw) -{ -} -EXPORT_SYMBOL(lc_watchdog_touch); - -void lc_watchdog_disable(struct lc_watchdog *lcw) -{ -} -EXPORT_SYMBOL(lc_watchdog_disable); - -void lc_watchdog_delete(struct lc_watchdog *lcw) -{ -} -EXPORT_SYMBOL(lc_watchdog_delete); - -#endif - diff --git a/lnet/libcfs/winnt/winnt-curproc.c b/lnet/libcfs/winnt/winnt-curproc.c deleted file mode 100644 index e21c5c9d51d573050fef5cc9b5d44bffc6004cb1..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-curproc.c +++ /dev/null @@ -1,453 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - * - * Impletion of winnt curproc routines. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - - -/* - * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) - * for Linux kernel. - */ - -cfs_task_t this_task = - { 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, - "sysetm\0" }; - - -uid_t cfs_curproc_uid(void) -{ - return this_task.uid; -} - -gid_t cfs_curproc_gid(void) -{ - return this_task.gid; -} - -uid_t cfs_curproc_fsuid(void) -{ - return this_task.fsuid; -} - -gid_t cfs_curproc_fsgid(void) -{ - return this_task.fsgid; -} - -pid_t cfs_curproc_pid(void) -{ - return cfs_current()->pid; -} - -int cfs_curproc_groups_nr(void) -{ - return this_task.ngroups; -} - -void cfs_curproc_groups_dump(gid_t *array, int size) -{ - LASSERT(size <= NGROUPS); - size = min_t(int, size, this_task.ngroups); - memcpy(array, this_task.groups, size * sizeof(__u32)); -} - -int cfs_curproc_is_in_groups(gid_t gid) -{ - return in_group_p(gid); -} - -mode_t cfs_curproc_umask(void) -{ - return this_task.umask; -} - -char *cfs_curproc_comm(void) -{ - return this_task.comm; -} - -cfs_kernel_cap_t cfs_curproc_cap_get(void) -{ - return this_task.cap_effective; -} - -void cfs_curproc_cap_set(cfs_kernel_cap_t cap) -{ - this_task.cap_effective = cap; -} - - -/* - * Implementation of linux task management routines - */ - - -/* global of the task manager structure */ - -TASK_MAN TaskMan; - - -/* - * task slot routiens - */ - -PTASK_SLOT -alloc_task_slot() -{ - PTASK_SLOT task = NULL; - - if (TaskMan.slab) { - task = cfs_mem_cache_alloc(TaskMan.slab, 0); - } else { - task = cfs_alloc(sizeof(TASK_SLOT), 0); - } - - return task; -} - -void -init_task_slot(PTASK_SLOT task) -{ - memset(task, 0, sizeof(TASK_SLOT)); - task->Magic = TASKSLT_MAGIC; - task->task = this_task; - task->task.pid = (pid_t)PsGetCurrentThreadId(); - cfs_init_event(&task->Event, TRUE, FALSE); -} - - -void -cleanup_task_slot(PTASK_SLOT task) -{ - if (TaskMan.slab) { - cfs_mem_cache_free(TaskMan.slab, task); - } else { - cfs_free(task); - } -} - -/* - * task manager related routines - */ - -VOID -task_manager_notify( - IN HANDLE ProcessId, - IN HANDLE ThreadId, - IN BOOLEAN Create - ) -{ - PLIST_ENTRY ListEntry = NULL; - PTASK_SLOT TaskSlot = NULL; - - spin_lock(&(TaskMan.Lock)); - - ListEntry = TaskMan.TaskList.Flink; - - while (ListEntry != (&(TaskMan.TaskList))) { - - TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); - - if (TaskSlot->Pid == ProcessId && TaskSlot->Tid == ThreadId) { - - if (Create) { -/* - DbgPrint("task_manager_notify: Pid=%xh Tid %xh resued (TaskSlot->Tet = %xh)...\n", - ProcessId, ThreadId, TaskSlot->Tet); -*/ - } else { - /* remove the taskslot */ - RemoveEntryList(&(TaskSlot->Link)); - TaskMan.NumOfTasks--; - - /* now free the task slot */ - cleanup_task_slot(TaskSlot); - } - } - - ListEntry = ListEntry->Flink; - } - - spin_unlock(&(TaskMan.Lock)); -} - -int -init_task_manager() -{ - NTSTATUS status; - - /* initialize the content and magic */ - memset(&TaskMan, 0, sizeof(TASK_MAN)); - TaskMan.Magic = TASKMAN_MAGIC; - - /* initialize the spinlock protection */ - spin_lock_init(&TaskMan.Lock); - - /* create slab memory cache */ - TaskMan.slab = cfs_mem_cache_create( - "TSLT", sizeof(TASK_SLOT), 0, 0); - - /* intialize the list header */ - InitializeListHead(&(TaskMan.TaskList)); - - /* set the thread creation/destruction notify routine */ - status = PsSetCreateThreadNotifyRoutine(task_manager_notify); - - if (!NT_SUCCESS(status)) { - cfs_enter_debugger(); - } - - return 0; -} - -void -cleanup_task_manager() -{ - PLIST_ENTRY ListEntry = NULL; - PTASK_SLOT TaskSlot = NULL; - - /* we must stay in system since we succeed to register the - CreateThreadNotifyRoutine: task_manager_notify */ - cfs_enter_debugger(); - - - /* cleanup all the taskslots attached to the list */ - spin_lock(&(TaskMan.Lock)); - - while (!IsListEmpty(&(TaskMan.TaskList))) { - - ListEntry = TaskMan.TaskList.Flink; - TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); - - RemoveEntryList(ListEntry); - cleanup_task_slot(TaskSlot); - } - - spin_unlock(&TaskMan.Lock); - - /* destroy the taskslot cache slab */ - cfs_mem_cache_destroy(TaskMan.slab); - memset(&TaskMan, 0, sizeof(TASK_MAN)); -} - - -/* - * schedule routines (task slot list) - */ - - -cfs_task_t * -cfs_current() -{ - HANDLE Pid = PsGetCurrentProcessId(); - HANDLE Tid = PsGetCurrentThreadId(); - PETHREAD Tet = PsGetCurrentThread(); - - PLIST_ENTRY ListEntry = NULL; - PTASK_SLOT TaskSlot = NULL; - - spin_lock(&(TaskMan.Lock)); - - ListEntry = TaskMan.TaskList.Flink; - - while (ListEntry != (&(TaskMan.TaskList))) { - - TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); - - if (TaskSlot->Pid == Pid && TaskSlot->Tid == Tid) { - if (TaskSlot->Tet != Tet) { - -/* - DbgPrint("cfs_current: Pid=%xh Tid %xh Tet = %xh resued (TaskSlot->Tet = %xh)...\n", - Pid, Tid, Tet, TaskSlot->Tet); -*/ - // - // The old thread was already exit. This must be a - // new thread which get the same Tid to the previous. - // - - TaskSlot->Tet = Tet; - } - break; - - } else { - - if ((ULONG)TaskSlot->Pid > (ULONG)Pid) { - TaskSlot = NULL; - break; - } else if ((ULONG)TaskSlot->Pid == (ULONG)Pid) { - if ((ULONG)TaskSlot->Tid > (ULONG)Tid) { - TaskSlot = NULL; - break; - } - } - - TaskSlot = NULL; - } - - ListEntry = ListEntry->Flink; - } - - if (!TaskSlot) { - - TaskSlot = alloc_task_slot(); - - if (!TaskSlot) { - cfs_enter_debugger(); - goto errorout; - } - - init_task_slot(TaskSlot); - - TaskSlot->Pid = Pid; - TaskSlot->Tid = Tid; - TaskSlot->Tet = Tet; - - if (ListEntry == (&(TaskMan.TaskList))) { - // - // Empty case or the biggest case, put it to the tail. - // - InsertTailList(&(TaskMan.TaskList), &(TaskSlot->Link)); - } else { - // - // Get a slot and smaller than it's tid, put it just before. - // - InsertHeadList(ListEntry->Blink, &(TaskSlot->Link)); - } - - TaskMan.NumOfTasks++; - } - - // - // To Check whether he task structures are arranged in the expected order ? - // - - { - PTASK_SLOT Prev = NULL, Curr = NULL; - - ListEntry = TaskMan.TaskList.Flink; - - while (ListEntry != (&(TaskMan.TaskList))) { - - Curr = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); - ListEntry = ListEntry->Flink; - - if (Prev) { - if ((ULONG)Prev->Pid > (ULONG)Curr->Pid) { - cfs_enter_debugger(); - } else if ((ULONG)Prev->Pid == (ULONG)Curr->Pid) { - if ((ULONG)Prev->Tid > (ULONG)Curr->Tid) { - cfs_enter_debugger(); - } - } - } - - Prev = Curr; - } - } - -errorout: - - spin_unlock(&(TaskMan.Lock)); - - if (!TaskSlot) { - cfs_enter_debugger(); - return NULL; - } - - return (&(TaskSlot->task)); -} - -int -schedule_timeout(int64_t time) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - cfs_enter_debugger(); - return 0; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - cfs_assert(slot->Magic == TASKSLT_MAGIC); - - if (time == MAX_SCHEDULE_TIMEOUT) { - time = 0; - } - - return (cfs_wait_event(&(slot->Event), time) != 0); -} - -int -schedule() -{ - return schedule_timeout(0); -} - -int -wake_up_process( - cfs_task_t * task - ) -{ - PTASK_SLOT slot = NULL; - - if (!task) { - cfs_enter_debugger(); - return 0; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - cfs_assert(slot->Magic == TASKSLT_MAGIC); - - cfs_wake_event(&(slot->Event)); - - return TRUE; -} - -void -sleep_on( - cfs_waitq_t *waitq - ) -{ - cfs_waitlink_t link; - - cfs_waitlink_init(&link); - cfs_waitq_add(waitq, &link); - cfs_waitq_wait(&link, CFS_TASK_INTERRUPTIBLE); - cfs_waitq_del(waitq, &link); -} - -EXPORT_SYMBOL(cfs_curproc_uid); -EXPORT_SYMBOL(cfs_curproc_pid); -EXPORT_SYMBOL(cfs_curproc_gid); -EXPORT_SYMBOL(cfs_curproc_fsuid); -EXPORT_SYMBOL(cfs_curproc_fsgid); -EXPORT_SYMBOL(cfs_curproc_umask); -EXPORT_SYMBOL(cfs_curproc_comm); -EXPORT_SYMBOL(cfs_curproc_groups_nr); -EXPORT_SYMBOL(cfs_curproc_groups_dump); -EXPORT_SYMBOL(cfs_curproc_is_in_groups); -EXPORT_SYMBOL(cfs_curproc_cap_get); -EXPORT_SYMBOL(cfs_curproc_cap_set); diff --git a/lnet/libcfs/winnt/winnt-debug.c b/lnet/libcfs/winnt/winnt-debug.c deleted file mode 100644 index 9e94f845905ccf0dabc9f43bb8b78269f5f011ad..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-debug.c +++ /dev/null @@ -1,1057 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> -#include "tracefile.h" - -void lnet_debug_dumpstack(cfs_task_t *tsk) -{ - return; -} - -cfs_task_t *lnet_current(void) -{ - return cfs_current(); -} - -int lnet_arch_debug_init(unsigned long bufsize) -{ - return 0; -} - -int lnet_arch_debug_cleanup(void) -{ - return 0; -} - -void lnet_run_lbug_upcall(char *file, const char *fn, const int line) -{ -} - -void lbug_with_loc(char *file, const char *func, const int line) -{ - libcfs_catastrophe = 1; - CEMERG("LBUG: pid: %u thread: %#x\n", - (unsigned)cfs_curproc_pid(), (unsigned)PsGetCurrentThread()); - // portals_debug_dumplog(); - // portals_run_lbug_upcall(file, func, line); -} - -#if TDI_LIBCFS_DBG - -/* - * Definitions - */ - -LONG KsDebugLevel = 0x5; - - -/* - * Routines - */ - - -/* - * KsNtStatusToString - * Get the error message for a specified nt status - * - * Arguments: - * Status - nt status code - * - * Return Value: - * PUCHAR - message string for the status code - * - * NOTES: - * N/A - */ - -PUCHAR -KsNtStatusToString (IN NTSTATUS Status) -{ - switch (Status) { - - case 0x00000000: return "STATUS_SUCCESS"; - case 0x00000001: return "STATUS_WAIT_1"; - case 0x00000002: return "STATUS_WAIT_2"; - case 0x00000003: return "STATUS_WAIT_3"; - case 0x0000003F: return "STATUS_WAIT_63"; - case 0x00000080: return "STATUS_ABANDONED_WAIT_0"; - case 0x000000BF: return "STATUS_ABANDONED_WAIT_63"; - case 0x000000C0: return "STATUS_USER_APC"; - case 0x00000100: return "STATUS_KERNEL_APC"; - case 0x00000101: return "STATUS_ALERTED"; - case 0x00000102: return "STATUS_TIMEOUT"; - case 0x00000103: return "STATUS_PENDING"; - case 0x00000104: return "STATUS_REPARSE"; - case 0x00000105: return "STATUS_MORE_ENTRIES"; - case 0x00000106: return "STATUS_NOT_ALL_ASSIGNED"; - case 0x00000107: return "STATUS_SOME_NOT_MAPPED"; - case 0x00000108: return "STATUS_OPLOCK_BREAK_IN_PROGRESS"; - case 0x00000109: return "STATUS_VOLUME_MOUNTED"; - case 0x0000010A: return "STATUS_RXACT_COMMITTED"; - case 0x0000010B: return "STATUS_NOTIFY_CLEANUP"; - case 0x0000010C: return "STATUS_NOTIFY_ENUM_DIR"; - case 0x0000010D: return "STATUS_NO_QUOTAS_FOR_ACCOUNT"; - case 0x0000010E: return "STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED"; - case 0x00000110: return "STATUS_PAGE_FAULT_TRANSITION"; - case 0x00000111: return "STATUS_PAGE_FAULT_DEMAND_ZERO"; - case 0x00000112: return "STATUS_PAGE_FAULT_COPY_ON_WRITE"; - case 0x00000113: return "STATUS_PAGE_FAULT_GUARD_PAGE"; - case 0x00000114: return "STATUS_PAGE_FAULT_PAGING_FILE"; - case 0x00000115: return "STATUS_CACHE_PAGE_LOCKED"; - case 0x00000116: return "STATUS_CRASH_DUMP"; - case 0x00000117: return "STATUS_BUFFER_ALL_ZEROS"; - case 0x00000118: return "STATUS_REPARSE_OBJECT"; - case 0x00000119: return "STATUS_RESOURCE_REQUIREMENTS_CHANGED"; - case 0x00000120: return "STATUS_TRANSLATION_COMPLETE"; - case 0x00000121: return "STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY"; - case 0x00010001: return "DBG_EXCEPTION_HANDLED"; - case 0x00010002: return "DBG_CONTINUE"; - case 0x40000000: return "STATUS_OBJECT_NAME_EXISTS"; - case 0x40000001: return "STATUS_THREAD_WAS_SUSPENDED"; - case 0x40000002: return "STATUS_WORKING_SET_LIMIT_RANGE"; - case 0x40000003: return "STATUS_IMAGE_NOT_AT_BASE"; - case 0x40000004: return "STATUS_RXACT_STATE_CREATED"; - case 0x40000005: return "STATUS_SEGMENT_NOTIFICATION"; - case 0x40000006: return "STATUS_LOCAL_USER_SESSION_KEY"; - case 0x40000007: return "STATUS_BAD_CURRENT_DIRECTORY"; - case 0x40000008: return "STATUS_SERIAL_MORE_WRITES"; - case 0x40000009: return "STATUS_REGISTRY_RECOVERED"; - case 0x4000000A: return "STATUS_FT_READ_RECOVERY_FROM_BACKUP"; - case 0x4000000B: return "STATUS_FT_WRITE_RECOVERY"; - case 0x4000000C: return "STATUS_SERIAL_COUNTER_TIMEOUT"; - case 0x4000000D: return "STATUS_NULL_LM_PASSWORD"; - case 0x4000000E: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH"; - case 0x4000000F: return "STATUS_RECEIVE_PARTIAL"; - case 0x40000010: return "STATUS_RECEIVE_EXPEDITED"; - case 0x40000011: return "STATUS_RECEIVE_PARTIAL_EXPEDITED"; - case 0x40000012: return "STATUS_EVENT_DONE"; - case 0x40000013: return "STATUS_EVENT_PENDING"; - case 0x40000014: return "STATUS_CHECKING_FILE_SYSTEM"; - case 0x40000015: return "STATUS_FATAL_APP_EXIT"; - case 0x40000016: return "STATUS_PREDEFINED_HANDLE"; - case 0x40000017: return "STATUS_WAS_UNLOCKED"; - case 0x40000018: return "STATUS_SERVICE_NOTIFICATION"; - case 0x40000019: return "STATUS_WAS_LOCKED"; - case 0x4000001A: return "STATUS_LOG_HARD_ERROR"; - case 0x4000001B: return "STATUS_ALREADY_WIN32"; - case 0x4000001C: return "STATUS_WX86_UNSIMULATE"; - case 0x4000001D: return "STATUS_WX86_CONTINUE"; - case 0x4000001E: return "STATUS_WX86_SINGLE_STEP"; - case 0x4000001F: return "STATUS_WX86_BREAKPOINT"; - case 0x40000020: return "STATUS_WX86_EXCEPTION_CONTINUE"; - case 0x40000021: return "STATUS_WX86_EXCEPTION_LASTCHANCE"; - case 0x40000022: return "STATUS_WX86_EXCEPTION_CHAIN"; - case 0x40000023: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE"; - case 0x40000024: return "STATUS_NO_YIELD_PERFORMED"; - case 0x40000025: return "STATUS_TIMER_RESUME_IGNORED"; - case 0x40000026: return "STATUS_ARBITRATION_UNHANDLED"; - case 0x40000027: return "STATUS_CARDBUS_NOT_SUPPORTED"; - case 0x40000028: return "STATUS_WX86_CREATEWX86TIB"; - case 0x40000029: return "STATUS_MP_PROCESSOR_MISMATCH"; - case 0x40010001: return "DBG_REPLY_LATER"; - case 0x40010002: return "DBG_UNABLE_TO_PROVIDE_HANDLE"; - case 0x40010003: return "DBG_TERMINATE_THREAD"; - case 0x40010004: return "DBG_TERMINATE_PROCESS"; - case 0x40010005: return "DBG_CONTROL_C"; - case 0x40010006: return "DBG_PRINTEXCEPTION_C"; - case 0x40010007: return "DBG_RIPEXCEPTION"; - case 0x40010008: return "DBG_CONTROL_BREAK"; - case 0x80000001: return "STATUS_GUARD_PAGE_VIOLATION"; - case 0x80000002: return "STATUS_DATATYPE_MISALIGNMENT"; - case 0x80000003: return "STATUS_BREAKPOINT"; - case 0x80000004: return "STATUS_SINGLE_STEP"; - case 0x80000005: return "STATUS_BUFFER_OVERFLOW"; - case 0x80000006: return "STATUS_NO_MORE_FILES"; - case 0x80000007: return "STATUS_WAKE_SYSTEM_DEBUGGER"; - case 0x8000000A: return "STATUS_HANDLES_CLOSED"; - case 0x8000000B: return "STATUS_NO_INHERITANCE"; - case 0x8000000C: return "STATUS_GUID_SUBSTITUTION_MADE"; - case 0x8000000D: return "STATUS_PARTIAL_COPY"; - case 0x8000000E: return "STATUS_DEVICE_PAPER_EMPTY"; - case 0x8000000F: return "STATUS_DEVICE_POWERED_OFF"; - case 0x80000010: return "STATUS_DEVICE_OFF_LINE"; - case 0x80000011: return "STATUS_DEVICE_BUSY"; - case 0x80000012: return "STATUS_NO_MORE_EAS"; - case 0x80000013: return "STATUS_INVALID_EA_NAME"; - case 0x80000014: return "STATUS_EA_LIST_INCONSISTENT"; - case 0x80000015: return "STATUS_INVALID_EA_FLAG"; - case 0x80000016: return "STATUS_VERIFY_REQUIRED"; - case 0x80000017: return "STATUS_EXTRANEOUS_INFORMATION"; - case 0x80000018: return "STATUS_RXACT_COMMIT_NECESSARY"; - case 0x8000001A: return "STATUS_NO_MORE_ENTRIES"; - case 0x8000001B: return "STATUS_FILEMARK_DETECTED"; - case 0x8000001C: return "STATUS_MEDIA_CHANGED"; - case 0x8000001D: return "STATUS_BUS_RESET"; - case 0x8000001E: return "STATUS_END_OF_MEDIA"; - case 0x8000001F: return "STATUS_BEGINNING_OF_MEDIA"; - case 0x80000020: return "STATUS_MEDIA_CHECK"; - case 0x80000021: return "STATUS_SETMARK_DETECTED"; - case 0x80000022: return "STATUS_NO_DATA_DETECTED"; - case 0x80000023: return "STATUS_REDIRECTOR_HAS_OPEN_HANDLES"; - case 0x80000024: return "STATUS_SERVER_HAS_OPEN_HANDLES"; - case 0x80000025: return "STATUS_ALREADY_DISCONNECTED"; - case 0x80000026: return "STATUS_LONGJUMP"; - case 0x80010001: return "DBG_EXCEPTION_NOT_HANDLED"; - case 0xC0000001: return "STATUS_UNSUCCESSFUL"; - case 0xC0000002: return "STATUS_NOT_IMPLEMENTED"; - case 0xC0000003: return "STATUS_INVALID_INFO_CLASS"; - case 0xC0000004: return "STATUS_INFO_LENGTH_MISMATCH"; - case 0xC0000005: return "STATUS_ACCESS_VIOLATION"; - case 0xC0000006: return "STATUS_IN_PAGE_ERROR"; - case 0xC0000007: return "STATUS_PAGEFILE_QUOTA"; - case 0xC0000008: return "STATUS_INVALID_HANDLE"; - case 0xC0000009: return "STATUS_BAD_INITIAL_STACK"; - case 0xC000000A: return "STATUS_BAD_INITIAL_PC"; - case 0xC000000B: return "STATUS_INVALID_CID"; - case 0xC000000C: return "STATUS_TIMER_NOT_CANCELED"; - case 0xC000000D: return "STATUS_INVALID_PARAMETER"; - case 0xC000000E: return "STATUS_NO_SUCH_DEVICE"; - case 0xC000000F: return "STATUS_NO_SUCH_FILE"; - case 0xC0000010: return "STATUS_INVALID_DEVICE_REQUEST"; - case 0xC0000011: return "STATUS_END_OF_FILE"; - case 0xC0000012: return "STATUS_WRONG_VOLUME"; - case 0xC0000013: return "STATUS_NO_MEDIA_IN_DEVICE"; - case 0xC0000014: return "STATUS_UNRECOGNIZED_MEDIA"; - case 0xC0000015: return "STATUS_NONEXISTENT_SECTOR"; - case 0xC0000016: return "STATUS_MORE_PROCESSING_REQUIRED"; - case 0xC0000017: return "STATUS_NO_MEMORY"; - case 0xC0000018: return "STATUS_CONFLICTING_ADDRESSES"; - case 0xC0000019: return "STATUS_NOT_MAPPED_VIEW"; - case 0xC000001A: return "STATUS_UNABLE_TO_FREE_VM"; - case 0xC000001B: return "STATUS_UNABLE_TO_DELETE_SECTION"; - case 0xC000001C: return "STATUS_INVALID_SYSTEM_SERVICE"; - case 0xC000001D: return "STATUS_ILLEGAL_INSTRUCTION"; - case 0xC000001E: return "STATUS_INVALID_LOCK_SEQUENCE"; - case 0xC000001F: return "STATUS_INVALID_VIEW_SIZE"; - case 0xC0000020: return "STATUS_INVALID_FILE_FOR_SECTION"; - case 0xC0000021: return "STATUS_ALREADY_COMMITTED"; - case 0xC0000022: return "STATUS_ACCESS_DENIED"; - case 0xC0000023: return "STATUS_BUFFER_TOO_SMALL"; - case 0xC0000024: return "STATUS_OBJECT_TYPE_MISMATCH"; - case 0xC0000025: return "STATUS_NONCONTINUABLE_EXCEPTION"; - case 0xC0000026: return "STATUS_INVALID_DISPOSITION"; - case 0xC0000027: return "STATUS_UNWIND"; - case 0xC0000028: return "STATUS_BAD_STACK"; - case 0xC0000029: return "STATUS_INVALID_UNWIND_TARGET"; - case 0xC000002A: return "STATUS_NOT_LOCKED"; - case 0xC000002B: return "STATUS_PARITY_ERROR"; - case 0xC000002C: return "STATUS_UNABLE_TO_DECOMMIT_VM"; - case 0xC000002D: return "STATUS_NOT_COMMITTED"; - case 0xC000002E: return "STATUS_INVALID_PORT_ATTRIBUTES"; - case 0xC000002F: return "STATUS_PORT_MESSAGE_TOO_LONG"; - case 0xC0000030: return "STATUS_INVALID_PARAMETER_MIX"; - case 0xC0000031: return "STATUS_INVALID_QUOTA_LOWER"; - case 0xC0000032: return "STATUS_DISK_CORRUPT_ERROR"; - case 0xC0000033: return "STATUS_OBJECT_NAME_INVALID"; - case 0xC0000034: return "STATUS_OBJECT_NAME_NOT_FOUND"; - case 0xC0000035: return "STATUS_OBJECT_NAME_COLLISION"; - case 0xC0000037: return "STATUS_PORT_DISCONNECTED"; - case 0xC0000038: return "STATUS_DEVICE_ALREADY_ATTACHED"; - case 0xC0000039: return "STATUS_OBJECT_PATH_INVALID"; - case 0xC000003A: return "STATUS_OBJECT_PATH_NOT_FOUND"; - case 0xC000003B: return "STATUS_OBJECT_PATH_SYNTAX_BAD"; - case 0xC000003C: return "STATUS_DATA_OVERRUN"; - case 0xC000003D: return "STATUS_DATA_LATE_ERROR"; - case 0xC000003E: return "STATUS_DATA_ERROR"; - case 0xC000003F: return "STATUS_CRC_ERROR"; - case 0xC0000040: return "STATUS_SECTION_TOO_BIG"; - case 0xC0000041: return "STATUS_PORT_CONNECTION_REFUSED"; - case 0xC0000042: return "STATUS_INVALID_PORT_HANDLE"; - case 0xC0000043: return "STATUS_SHARING_VIOLATION"; - case 0xC0000044: return "STATUS_QUOTA_EXCEEDED"; - case 0xC0000045: return "STATUS_INVALID_PAGE_PROTECTION"; - case 0xC0000046: return "STATUS_MUTANT_NOT_OWNED"; - case 0xC0000047: return "STATUS_SEMAPHORE_LIMIT_EXCEEDED"; - case 0xC0000048: return "STATUS_PORT_ALREADY_SET"; - case 0xC0000049: return "STATUS_SECTION_NOT_IMAGE"; - case 0xC000004A: return "STATUS_SUSPEND_COUNT_EXCEEDED"; - case 0xC000004B: return "STATUS_THREAD_IS_TERMINATING"; - case 0xC000004C: return "STATUS_BAD_WORKING_SET_LIMIT"; - case 0xC000004D: return "STATUS_INCOMPATIBLE_FILE_MAP"; - case 0xC000004E: return "STATUS_SECTION_PROTECTION"; - case 0xC000004F: return "STATUS_EAS_NOT_SUPPORTED"; - case 0xC0000050: return "STATUS_EA_TOO_LARGE"; - case 0xC0000051: return "STATUS_NONEXISTENT_EA_ENTRY"; - case 0xC0000052: return "STATUS_NO_EAS_ON_FILE"; - case 0xC0000053: return "STATUS_EA_CORRUPT_ERROR"; - case 0xC0000054: return "STATUS_FILE_LOCK_CONFLICT"; - case 0xC0000055: return "STATUS_LOCK_NOT_GRANTED"; - case 0xC0000056: return "STATUS_DELETE_PENDING"; - case 0xC0000057: return "STATUS_CTL_FILE_NOT_SUPPORTED"; - case 0xC0000058: return "STATUS_UNKNOWN_REVISION"; - case 0xC0000059: return "STATUS_REVISION_MISMATCH"; - case 0xC000005A: return "STATUS_INVALID_OWNER"; - case 0xC000005B: return "STATUS_INVALID_PRIMARY_GROUP"; - case 0xC000005C: return "STATUS_NO_IMPERSONATION_TOKEN"; - case 0xC000005D: return "STATUS_CANT_DISABLE_MANDATORY"; - case 0xC000005E: return "STATUS_NO_LOGON_SERVERS"; - case 0xC000005F: return "STATUS_NO_SUCH_LOGON_SESSION"; - case 0xC0000060: return "STATUS_NO_SUCH_PRIVILEGE"; - case 0xC0000061: return "STATUS_PRIVILEGE_NOT_HELD"; - case 0xC0000062: return "STATUS_INVALID_ACCOUNT_NAME"; - case 0xC0000063: return "STATUS_USER_EXISTS"; - case 0xC0000064: return "STATUS_NO_SUCH_USER"; - case 0xC0000065: return "STATUS_GROUP_EXISTS"; - case 0xC0000066: return "STATUS_NO_SUCH_GROUP"; - case 0xC0000067: return "STATUS_MEMBER_IN_GROUP"; - case 0xC0000068: return "STATUS_MEMBER_NOT_IN_GROUP"; - case 0xC0000069: return "STATUS_LAST_ADMIN"; - case 0xC000006A: return "STATUS_WRONG_PASSWORD"; - case 0xC000006B: return "STATUS_ILL_FORMED_PASSWORD"; - case 0xC000006C: return "STATUS_PASSWORD_RESTRICTION"; - case 0xC000006D: return "STATUS_LOGON_FAILURE"; - case 0xC000006E: return "STATUS_ACCOUNT_RESTRICTION"; - case 0xC000006F: return "STATUS_INVALID_LOGON_HOURS"; - case 0xC0000070: return "STATUS_INVALID_WORKSTATION"; - case 0xC0000071: return "STATUS_PASSWORD_EXPIRED"; - case 0xC0000072: return "STATUS_ACCOUNT_DISABLED"; - case 0xC0000073: return "STATUS_NONE_MAPPED"; - case 0xC0000074: return "STATUS_TOO_MANY_LUIDS_REQUESTED"; - case 0xC0000075: return "STATUS_LUIDS_EXHAUSTED"; - case 0xC0000076: return "STATUS_INVALID_SUB_AUTHORITY"; - case 0xC0000077: return "STATUS_INVALID_ACL"; - case 0xC0000078: return "STATUS_INVALID_SID"; - case 0xC0000079: return "STATUS_INVALID_SECURITY_DESCR"; - case 0xC000007A: return "STATUS_PROCEDURE_NOT_FOUND"; - case 0xC000007B: return "STATUS_INVALID_IMAGE_FORMAT"; - case 0xC000007C: return "STATUS_NO_TOKEN"; - case 0xC000007D: return "STATUS_BAD_INHERITANCE_ACL"; - case 0xC000007E: return "STATUS_RANGE_NOT_LOCKED"; - case 0xC000007F: return "STATUS_DISK_FULL"; - case 0xC0000080: return "STATUS_SERVER_DISABLED"; - case 0xC0000081: return "STATUS_SERVER_NOT_DISABLED"; - case 0xC0000082: return "STATUS_TOO_MANY_GUIDS_REQUESTED"; - case 0xC0000083: return "STATUS_GUIDS_EXHAUSTED"; - case 0xC0000084: return "STATUS_INVALID_ID_AUTHORITY"; - case 0xC0000085: return "STATUS_AGENTS_EXHAUSTED"; - case 0xC0000086: return "STATUS_INVALID_VOLUME_LABEL"; - case 0xC0000087: return "STATUS_SECTION_NOT_EXTENDED"; - case 0xC0000088: return "STATUS_NOT_MAPPED_DATA"; - case 0xC0000089: return "STATUS_RESOURCE_DATA_NOT_FOUND"; - case 0xC000008A: return "STATUS_RESOURCE_TYPE_NOT_FOUND"; - case 0xC000008B: return "STATUS_RESOURCE_NAME_NOT_FOUND"; - case 0xC000008C: return "STATUS_ARRAY_BOUNDS_EXCEEDED"; - case 0xC000008D: return "STATUS_FLOAT_DENORMAL_OPERAND"; - case 0xC000008E: return "STATUS_FLOAT_DIVIDE_BY_ZERO"; - case 0xC000008F: return "STATUS_FLOAT_INEXACT_RESULT"; - case 0xC0000090: return "STATUS_FLOAT_INVALID_OPERATION"; - case 0xC0000091: return "STATUS_FLOAT_OVERFLOW"; - case 0xC0000092: return "STATUS_FLOAT_STACK_CHECK"; - case 0xC0000093: return "STATUS_FLOAT_UNDERFLOW"; - case 0xC0000094: return "STATUS_INTEGER_DIVIDE_BY_ZERO"; - case 0xC0000095: return "STATUS_INTEGER_OVERFLOW"; - case 0xC0000096: return "STATUS_PRIVILEGED_INSTRUCTION"; - case 0xC0000097: return "STATUS_TOO_MANY_PAGING_FILES"; - case 0xC0000098: return "STATUS_FILE_INVALID"; - case 0xC0000099: return "STATUS_ALLOTTED_SPACE_EXCEEDED"; - case 0xC000009A: return "STATUS_INSUFFICIENT_RESOURCES"; - case 0xC000009B: return "STATUS_DFS_EXIT_PATH_FOUND"; - case 0xC000009C: return "STATUS_DEVICE_DATA_ERROR"; - case 0xC000009D: return "STATUS_DEVICE_NOT_CONNECTED"; - case 0xC000009E: return "STATUS_DEVICE_POWER_FAILURE"; - case 0xC000009F: return "STATUS_FREE_VM_NOT_AT_BASE"; - case 0xC00000A0: return "STATUS_MEMORY_NOT_ALLOCATED"; - case 0xC00000A1: return "STATUS_WORKING_SET_QUOTA"; - case 0xC00000A2: return "STATUS_MEDIA_WRITE_PROTECTED"; - case 0xC00000A3: return "STATUS_DEVICE_NOT_READY"; - case 0xC00000A4: return "STATUS_INVALID_GROUP_ATTRIBUTES"; - case 0xC00000A5: return "STATUS_BAD_IMPERSONATION_LEVEL"; - case 0xC00000A6: return "STATUS_CANT_OPEN_ANONYMOUS"; - case 0xC00000A7: return "STATUS_BAD_VALIDATION_CLASS"; - case 0xC00000A8: return "STATUS_BAD_TOKEN_TYPE"; - case 0xC00000A9: return "STATUS_BAD_MASTER_BOOT_RECORD"; - case 0xC00000AA: return "STATUS_INSTRUCTION_MISALIGNMENT"; - case 0xC00000AB: return "STATUS_INSTANCE_NOT_AVAILABLE"; - case 0xC00000AC: return "STATUS_PIPE_NOT_AVAILABLE"; - case 0xC00000AD: return "STATUS_INVALID_PIPE_STATE"; - case 0xC00000AE: return "STATUS_PIPE_BUSY"; - case 0xC00000AF: return "STATUS_ILLEGAL_FUNCTION"; - case 0xC00000B0: return "STATUS_PIPE_DISCONNECTED"; - case 0xC00000B1: return "STATUS_PIPE_CLOSING"; - case 0xC00000B2: return "STATUS_PIPE_CONNECTED"; - case 0xC00000B3: return "STATUS_PIPE_LISTENING"; - case 0xC00000B4: return "STATUS_INVALID_READ_MODE"; - case 0xC00000B5: return "STATUS_IO_TIMEOUT"; - case 0xC00000B6: return "STATUS_FILE_FORCED_CLOSED"; - case 0xC00000B7: return "STATUS_PROFILING_NOT_STARTED"; - case 0xC00000B8: return "STATUS_PROFILING_NOT_STOPPED"; - case 0xC00000B9: return "STATUS_COULD_NOT_INTERPRET"; - case 0xC00000BA: return "STATUS_FILE_IS_A_DIRECTORY"; - case 0xC00000BB: return "STATUS_NOT_SUPPORTED"; - case 0xC00000BC: return "STATUS_REMOTE_NOT_LISTENING"; - case 0xC00000BD: return "STATUS_DUPLICATE_NAME"; - case 0xC00000BE: return "STATUS_BAD_NETWORK_PATH"; - case 0xC00000BF: return "STATUS_NETWORK_BUSY"; - case 0xC00000C0: return "STATUS_DEVICE_DOES_NOT_EXIST"; - case 0xC00000C1: return "STATUS_TOO_MANY_COMMANDS"; - case 0xC00000C2: return "STATUS_ADAPTER_HARDWARE_ERROR"; - case 0xC00000C3: return "STATUS_INVALID_NETWORK_RESPONSE"; - case 0xC00000C4: return "STATUS_UNEXPECTED_NETWORK_ERROR"; - case 0xC00000C5: return "STATUS_BAD_REMOTE_ADAPTER"; - case 0xC00000C6: return "STATUS_PRINT_QUEUE_FULL"; - case 0xC00000C7: return "STATUS_NO_SPOOL_SPACE"; - case 0xC00000C8: return "STATUS_PRINT_CANCELLED"; - case 0xC00000C9: return "STATUS_NETWORK_NAME_DELETED"; - case 0xC00000CA: return "STATUS_NETWORK_ACCESS_DENIED"; - case 0xC00000CB: return "STATUS_BAD_DEVICE_TYPE"; - case 0xC00000CC: return "STATUS_BAD_NETWORK_NAME"; - case 0xC00000CD: return "STATUS_TOO_MANY_NAMES"; - case 0xC00000CE: return "STATUS_TOO_MANY_SESSIONS"; - case 0xC00000CF: return "STATUS_SHARING_PAUSED"; - case 0xC00000D0: return "STATUS_REQUEST_NOT_ACCEPTED"; - case 0xC00000D1: return "STATUS_REDIRECTOR_PAUSED"; - case 0xC00000D2: return "STATUS_NET_WRITE_FAULT"; - case 0xC00000D3: return "STATUS_PROFILING_AT_LIMIT"; - case 0xC00000D4: return "STATUS_NOT_SAME_DEVICE"; - case 0xC00000D5: return "STATUS_FILE_RENAMED"; - case 0xC00000D6: return "STATUS_VIRTUAL_CIRCUIT_CLOSED"; - case 0xC00000D7: return "STATUS_NO_SECURITY_ON_OBJECT"; - case 0xC00000D8: return "STATUS_CANT_WAIT"; - case 0xC00000D9: return "STATUS_PIPE_EMPTY"; - case 0xC00000DA: return "STATUS_CANT_ACCESS_DOMAIN_INFO"; - case 0xC00000DB: return "STATUS_CANT_TERMINATE_SELF"; - case 0xC00000DC: return "STATUS_INVALID_SERVER_STATE"; - case 0xC00000DD: return "STATUS_INVALID_DOMAIN_STATE"; - case 0xC00000DE: return "STATUS_INVALID_DOMAIN_ROLE"; - case 0xC00000DF: return "STATUS_NO_SUCH_DOMAIN"; - case 0xC00000E0: return "STATUS_DOMAIN_EXISTS"; - case 0xC00000E1: return "STATUS_DOMAIN_LIMIT_EXCEEDED"; - case 0xC00000E2: return "STATUS_OPLOCK_NOT_GRANTED"; - case 0xC00000E3: return "STATUS_INVALID_OPLOCK_PROTOCOL"; - case 0xC00000E4: return "STATUS_INTERNAL_DB_CORRUPTION"; - case 0xC00000E5: return "STATUS_INTERNAL_ERROR"; - case 0xC00000E6: return "STATUS_GENERIC_NOT_MAPPED"; - case 0xC00000E7: return "STATUS_BAD_DESCRIPTOR_FORMAT"; - case 0xC00000E8: return "STATUS_INVALID_USER_BUFFER"; - case 0xC00000E9: return "STATUS_UNEXPECTED_IO_ERROR"; - case 0xC00000EA: return "STATUS_UNEXPECTED_MM_CREATE_ERR"; - case 0xC00000EB: return "STATUS_UNEXPECTED_MM_MAP_ERROR"; - case 0xC00000EC: return "STATUS_UNEXPECTED_MM_EXTEND_ERR"; - case 0xC00000ED: return "STATUS_NOT_LOGON_PROCESS"; - case 0xC00000EE: return "STATUS_LOGON_SESSION_EXISTS"; - case 0xC00000EF: return "STATUS_INVALID_PARAMETER_1"; - case 0xC00000F0: return "STATUS_INVALID_PARAMETER_2"; - case 0xC00000F1: return "STATUS_INVALID_PARAMETER_3"; - case 0xC00000F2: return "STATUS_INVALID_PARAMETER_4"; - case 0xC00000F3: return "STATUS_INVALID_PARAMETER_5"; - case 0xC00000F4: return "STATUS_INVALID_PARAMETER_6"; - case 0xC00000F5: return "STATUS_INVALID_PARAMETER_7"; - case 0xC00000F6: return "STATUS_INVALID_PARAMETER_8"; - case 0xC00000F7: return "STATUS_INVALID_PARAMETER_9"; - case 0xC00000F8: return "STATUS_INVALID_PARAMETER_10"; - case 0xC00000F9: return "STATUS_INVALID_PARAMETER_11"; - case 0xC00000FA: return "STATUS_INVALID_PARAMETER_12"; - case 0xC00000FB: return "STATUS_REDIRECTOR_NOT_STARTED"; - case 0xC00000FC: return "STATUS_REDIRECTOR_STARTED"; - case 0xC00000FD: return "STATUS_STACK_OVERFLOW"; - case 0xC00000FE: return "STATUS_NO_SUCH_PACKAGE"; - case 0xC00000FF: return "STATUS_BAD_FUNCTION_TABLE"; - case 0xC0000100: return "STATUS_VARIABLE_NOT_FOUND"; - case 0xC0000101: return "STATUS_DIRECTORY_NOT_EMPTY"; - case 0xC0000102: return "STATUS_FILE_CORRUPT_ERROR"; - case 0xC0000103: return "STATUS_NOT_A_DIRECTORY"; - case 0xC0000104: return "STATUS_BAD_LOGON_SESSION_STATE"; - case 0xC0000105: return "STATUS_LOGON_SESSION_COLLISION"; - case 0xC0000106: return "STATUS_NAME_TOO_LONG"; - case 0xC0000107: return "STATUS_FILES_OPEN"; - case 0xC0000108: return "STATUS_CONNECTION_IN_USE"; - case 0xC0000109: return "STATUS_MESSAGE_NOT_FOUND"; - case 0xC000010A: return "STATUS_PROCESS_IS_TERMINATING"; - case 0xC000010B: return "STATUS_INVALID_LOGON_TYPE"; - case 0xC000010C: return "STATUS_NO_GUID_TRANSLATION"; - case 0xC000010D: return "STATUS_CANNOT_IMPERSONATE"; - case 0xC000010E: return "STATUS_IMAGE_ALREADY_LOADED"; - case 0xC000010F: return "STATUS_ABIOS_NOT_PRESENT"; - case 0xC0000110: return "STATUS_ABIOS_LID_NOT_EXIST"; - case 0xC0000111: return "STATUS_ABIOS_LID_ALREADY_OWNED"; - case 0xC0000112: return "STATUS_ABIOS_NOT_LID_OWNER"; - case 0xC0000113: return "STATUS_ABIOS_INVALID_COMMAND"; - case 0xC0000114: return "STATUS_ABIOS_INVALID_LID"; - case 0xC0000115: return "STATUS_ABIOS_SELECTOR_NOT_AVAILABLE"; - case 0xC0000116: return "STATUS_ABIOS_INVALID_SELECTOR"; - case 0xC0000117: return "STATUS_NO_LDT"; - case 0xC0000118: return "STATUS_INVALID_LDT_SIZE"; - case 0xC0000119: return "STATUS_INVALID_LDT_OFFSET"; - case 0xC000011A: return "STATUS_INVALID_LDT_DESCRIPTOR"; - case 0xC000011B: return "STATUS_INVALID_IMAGE_NE_FORMAT"; - case 0xC000011C: return "STATUS_RXACT_INVALID_STATE"; - case 0xC000011D: return "STATUS_RXACT_COMMIT_FAILURE"; - case 0xC000011E: return "STATUS_MAPPED_FILE_SIZE_ZERO"; - case 0xC000011F: return "STATUS_TOO_MANY_OPENED_FILES"; - case 0xC0000120: return "STATUS_CANCELLED"; - case 0xC0000121: return "STATUS_CANNOT_DELETE"; - case 0xC0000122: return "STATUS_INVALID_COMPUTER_NAME"; - case 0xC0000123: return "STATUS_FILE_DELETED"; - case 0xC0000124: return "STATUS_SPECIAL_ACCOUNT"; - case 0xC0000125: return "STATUS_SPECIAL_GROUP"; - case 0xC0000126: return "STATUS_SPECIAL_USER"; - case 0xC0000127: return "STATUS_MEMBERS_PRIMARY_GROUP"; - case 0xC0000128: return "STATUS_FILE_CLOSED"; - case 0xC0000129: return "STATUS_TOO_MANY_THREADS"; - case 0xC000012A: return "STATUS_THREAD_NOT_IN_PROCESS"; - case 0xC000012B: return "STATUS_TOKEN_ALREADY_IN_USE"; - case 0xC000012C: return "STATUS_PAGEFILE_QUOTA_EXCEEDED"; - case 0xC000012D: return "STATUS_COMMITMENT_LIMIT"; - case 0xC000012E: return "STATUS_INVALID_IMAGE_LE_FORMAT"; - case 0xC000012F: return "STATUS_INVALID_IMAGE_NOT_MZ"; - case 0xC0000130: return "STATUS_INVALID_IMAGE_PROTECT"; - case 0xC0000131: return "STATUS_INVALID_IMAGE_WIN_16"; - case 0xC0000132: return "STATUS_LOGON_SERVER_CONFLICT"; - case 0xC0000133: return "STATUS_TIME_DIFFERENCE_AT_DC"; - case 0xC0000134: return "STATUS_SYNCHRONIZATION_REQUIRED"; - case 0xC0000135: return "STATUS_DLL_NOT_FOUND"; - case 0xC0000136: return "STATUS_OPEN_FAILED"; - case 0xC0000137: return "STATUS_IO_PRIVILEGE_FAILED"; - case 0xC0000138: return "STATUS_ORDINAL_NOT_FOUND"; - case 0xC0000139: return "STATUS_ENTRYPOINT_NOT_FOUND"; - case 0xC000013A: return "STATUS_CONTROL_C_EXIT"; - case 0xC000013B: return "STATUS_LOCAL_DISCONNECT"; - case 0xC000013C: return "STATUS_REMOTE_DISCONNECT"; - case 0xC000013D: return "STATUS_REMOTE_RESOURCES"; - case 0xC000013E: return "STATUS_LINK_FAILED"; - case 0xC000013F: return "STATUS_LINK_TIMEOUT"; - case 0xC0000140: return "STATUS_INVALID_CONNECTION"; - case 0xC0000141: return "STATUS_INVALID_ADDRESS"; - case 0xC0000142: return "STATUS_DLL_INIT_FAILED"; - case 0xC0000143: return "STATUS_MISSING_SYSTEMFILE"; - case 0xC0000144: return "STATUS_UNHANDLED_EXCEPTION"; - case 0xC0000145: return "STATUS_APP_INIT_FAILURE"; - case 0xC0000146: return "STATUS_PAGEFILE_CREATE_FAILED"; - case 0xC0000147: return "STATUS_NO_PAGEFILE"; - case 0xC0000148: return "STATUS_INVALID_LEVEL"; - case 0xC0000149: return "STATUS_WRONG_PASSWORD_CORE"; - case 0xC000014A: return "STATUS_ILLEGAL_FLOAT_CONTEXT"; - case 0xC000014B: return "STATUS_PIPE_BROKEN"; - case 0xC000014C: return "STATUS_REGISTRY_CORRUPT"; - case 0xC000014D: return "STATUS_REGISTRY_IO_FAILED"; - case 0xC000014E: return "STATUS_NO_EVENT_PAIR"; - case 0xC000014F: return "STATUS_UNRECOGNIZED_VOLUME"; - case 0xC0000150: return "STATUS_SERIAL_NO_DEVICE_INITED"; - case 0xC0000151: return "STATUS_NO_SUCH_ALIAS"; - case 0xC0000152: return "STATUS_MEMBER_NOT_IN_ALIAS"; - case 0xC0000153: return "STATUS_MEMBER_IN_ALIAS"; - case 0xC0000154: return "STATUS_ALIAS_EXISTS"; - case 0xC0000155: return "STATUS_LOGON_NOT_GRANTED"; - case 0xC0000156: return "STATUS_TOO_MANY_SECRETS"; - case 0xC0000157: return "STATUS_SECRET_TOO_LONG"; - case 0xC0000158: return "STATUS_INTERNAL_DB_ERROR"; - case 0xC0000159: return "STATUS_FULLSCREEN_MODE"; - case 0xC000015A: return "STATUS_TOO_MANY_CONTEXT_IDS"; - case 0xC000015B: return "STATUS_LOGON_TYPE_NOT_GRANTED"; - case 0xC000015C: return "STATUS_NOT_REGISTRY_FILE"; - case 0xC000015D: return "STATUS_NT_CROSS_ENCRYPTION_REQUIRED"; - case 0xC000015E: return "STATUS_DOMAIN_CTRLR_CONFIG_ERROR"; - case 0xC000015F: return "STATUS_FT_MISSING_MEMBER"; - case 0xC0000160: return "STATUS_ILL_FORMED_SERVICE_ENTRY"; - case 0xC0000161: return "STATUS_ILLEGAL_CHARACTER"; - case 0xC0000162: return "STATUS_UNMAPPABLE_CHARACTER"; - case 0xC0000163: return "STATUS_UNDEFINED_CHARACTER"; - case 0xC0000164: return "STATUS_FLOPPY_VOLUME"; - case 0xC0000165: return "STATUS_FLOPPY_ID_MARK_NOT_FOUND"; - case 0xC0000166: return "STATUS_FLOPPY_WRONG_CYLINDER"; - case 0xC0000167: return "STATUS_FLOPPY_UNKNOWN_ERROR"; - case 0xC0000168: return "STATUS_FLOPPY_BAD_REGISTERS"; - case 0xC0000169: return "STATUS_DISK_RECALIBRATE_FAILED"; - case 0xC000016A: return "STATUS_DISK_OPERATION_FAILED"; - case 0xC000016B: return "STATUS_DISK_RESET_FAILED"; - case 0xC000016C: return "STATUS_SHARED_IRQ_BUSY"; - case 0xC000016D: return "STATUS_FT_ORPHANING"; - case 0xC000016E: return "STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT"; - case 0xC0000172: return "STATUS_PARTITION_FAILURE"; - case 0xC0000173: return "STATUS_INVALID_BLOCK_LENGTH"; - case 0xC0000174: return "STATUS_DEVICE_NOT_PARTITIONED"; - case 0xC0000175: return "STATUS_UNABLE_TO_LOCK_MEDIA"; - case 0xC0000176: return "STATUS_UNABLE_TO_UNLOAD_MEDIA"; - case 0xC0000177: return "STATUS_EOM_OVERFLOW"; - case 0xC0000178: return "STATUS_NO_MEDIA"; - case 0xC000017A: return "STATUS_NO_SUCH_MEMBER"; - case 0xC000017B: return "STATUS_INVALID_MEMBER"; - case 0xC000017C: return "STATUS_KEY_DELETED"; - case 0xC000017D: return "STATUS_NO_LOG_SPACE"; - case 0xC000017E: return "STATUS_TOO_MANY_SIDS"; - case 0xC000017F: return "STATUS_LM_CROSS_ENCRYPTION_REQUIRED"; - case 0xC0000180: return "STATUS_KEY_HAS_CHILDREN"; - case 0xC0000181: return "STATUS_CHILD_MUST_BE_VOLATILE"; - case 0xC0000182: return "STATUS_DEVICE_CONFIGURATION_ERROR"; - case 0xC0000183: return "STATUS_DRIVER_INTERNAL_ERROR"; - case 0xC0000184: return "STATUS_INVALID_DEVICE_STATE"; - case 0xC0000185: return "STATUS_IO_DEVICE_ERROR"; - case 0xC0000186: return "STATUS_DEVICE_PROTOCOL_ERROR"; - case 0xC0000187: return "STATUS_BACKUP_CONTROLLER"; - case 0xC0000188: return "STATUS_LOG_FILE_FULL"; - case 0xC0000189: return "STATUS_TOO_LATE"; - case 0xC000018A: return "STATUS_NO_TRUST_LSA_SECRET"; - case 0xC000018B: return "STATUS_NO_TRUST_SAM_ACCOUNT"; - case 0xC000018C: return "STATUS_TRUSTED_DOMAIN_FAILURE"; - case 0xC000018D: return "STATUS_TRUSTED_RELATIONSHIP_FAILURE"; - case 0xC000018E: return "STATUS_EVENTLOG_FILE_CORRUPT"; - case 0xC000018F: return "STATUS_EVENTLOG_CANT_START"; - case 0xC0000190: return "STATUS_TRUST_FAILURE"; - case 0xC0000191: return "STATUS_MUTANT_LIMIT_EXCEEDED"; - case 0xC0000192: return "STATUS_NETLOGON_NOT_STARTED"; - case 0xC0000193: return "STATUS_ACCOUNT_EXPIRED"; - case 0xC0000194: return "STATUS_POSSIBLE_DEADLOCK"; - case 0xC0000195: return "STATUS_NETWORK_CREDENTIAL_CONFLICT"; - case 0xC0000196: return "STATUS_REMOTE_SESSION_LIMIT"; - case 0xC0000197: return "STATUS_EVENTLOG_FILE_CHANGED"; - case 0xC0000198: return "STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT"; - case 0xC0000199: return "STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT"; - case 0xC000019A: return "STATUS_NOLOGON_SERVER_TRUST_ACCOUNT"; - case 0xC000019B: return "STATUS_DOMAIN_TRUST_INCONSISTENT"; - case 0xC000019C: return "STATUS_FS_DRIVER_REQUIRED"; - case 0xC0000202: return "STATUS_NO_USER_SESSION_KEY"; - case 0xC0000203: return "STATUS_USER_SESSION_DELETED"; - case 0xC0000204: return "STATUS_RESOURCE_LANG_NOT_FOUND"; - case 0xC0000205: return "STATUS_INSUFF_SERVER_RESOURCES"; - case 0xC0000206: return "STATUS_INVALID_BUFFER_SIZE"; - case 0xC0000207: return "STATUS_INVALID_ADDRESS_COMPONENT"; - case 0xC0000208: return "STATUS_INVALID_ADDRESS_WILDCARD"; - case 0xC0000209: return "STATUS_TOO_MANY_ADDRESSES"; - case 0xC000020A: return "STATUS_ADDRESS_ALREADY_EXISTS"; - case 0xC000020B: return "STATUS_ADDRESS_CLOSED"; - case 0xC000020C: return "STATUS_CONNECTION_DISCONNECTED"; - case 0xC000020D: return "STATUS_CONNECTION_RESET"; - case 0xC000020E: return "STATUS_TOO_MANY_NODES"; - case 0xC000020F: return "STATUS_TRANSACTION_ABORTED"; - case 0xC0000210: return "STATUS_TRANSACTION_TIMED_OUT"; - case 0xC0000211: return "STATUS_TRANSACTION_NO_RELEASE"; - case 0xC0000212: return "STATUS_TRANSACTION_NO_MATCH"; - case 0xC0000213: return "STATUS_TRANSACTION_RESPONDED"; - case 0xC0000214: return "STATUS_TRANSACTION_INVALID_ID"; - case 0xC0000215: return "STATUS_TRANSACTION_INVALID_TYPE"; - case 0xC0000216: return "STATUS_NOT_SERVER_SESSION"; - case 0xC0000217: return "STATUS_NOT_CLIENT_SESSION"; - case 0xC0000218: return "STATUS_CANNOT_LOAD_REGISTRY_FILE"; - case 0xC0000219: return "STATUS_DEBUG_ATTACH_FAILED"; - case 0xC000021A: return "STATUS_SYSTEM_PROCESS_TERMINATED"; - case 0xC000021B: return "STATUS_DATA_NOT_ACCEPTED"; - case 0xC000021C: return "STATUS_NO_BROWSER_SERVERS_FOUND"; - case 0xC000021D: return "STATUS_VDM_HARD_ERROR"; - case 0xC000021E: return "STATUS_DRIVER_CANCEL_TIMEOUT"; - case 0xC000021F: return "STATUS_REPLY_MESSAGE_MISMATCH"; - case 0xC0000220: return "STATUS_MAPPED_ALIGNMENT"; - case 0xC0000221: return "STATUS_IMAGE_CHECKSUM_MISMATCH"; - case 0xC0000222: return "STATUS_LOST_WRITEBEHIND_DATA"; - case 0xC0000223: return "STATUS_CLIENT_SERVER_PARAMETERS_INVALID"; - case 0xC0000224: return "STATUS_PASSWORD_MUST_CHANGE"; - case 0xC0000225: return "STATUS_NOT_FOUND"; - case 0xC0000226: return "STATUS_NOT_TINY_STREAM"; - case 0xC0000227: return "STATUS_RECOVERY_FAILURE"; - case 0xC0000228: return "STATUS_STACK_OVERFLOW_READ"; - case 0xC0000229: return "STATUS_FAIL_CHECK"; - case 0xC000022A: return "STATUS_DUPLICATE_OBJECTID"; - case 0xC000022B: return "STATUS_OBJECTID_EXISTS"; - case 0xC000022C: return "STATUS_CONVERT_TO_LARGE"; - case 0xC000022D: return "STATUS_RETRY"; - case 0xC000022E: return "STATUS_FOUND_OUT_OF_SCOPE"; - case 0xC000022F: return "STATUS_ALLOCATE_BUCKET"; - case 0xC0000230: return "STATUS_PROPSET_NOT_FOUND"; - case 0xC0000231: return "STATUS_MARSHALL_OVERFLOW"; - case 0xC0000232: return "STATUS_INVALID_VARIANT"; - case 0xC0000233: return "STATUS_DOMAIN_CONTROLLER_NOT_FOUND"; - case 0xC0000234: return "STATUS_ACCOUNT_LOCKED_OUT"; - case 0xC0000235: return "STATUS_HANDLE_NOT_CLOSABLE"; - case 0xC0000236: return "STATUS_CONNECTION_REFUSED"; - case 0xC0000237: return "STATUS_GRACEFUL_DISCONNECT"; - case 0xC0000238: return "STATUS_ADDRESS_ALREADY_ASSOCIATED"; - case 0xC0000239: return "STATUS_ADDRESS_NOT_ASSOCIATED"; - case 0xC000023A: return "STATUS_CONNECTION_INVALID"; - case 0xC000023B: return "STATUS_CONNECTION_ACTIVE"; - case 0xC000023C: return "STATUS_NETWORK_UNREACHABLE"; - case 0xC000023D: return "STATUS_HOST_UNREACHABLE"; - case 0xC000023E: return "STATUS_PROTOCOL_UNREACHABLE"; - case 0xC000023F: return "STATUS_PORT_UNREACHABLE"; - case 0xC0000240: return "STATUS_REQUEST_ABORTED"; - case 0xC0000241: return "STATUS_CONNECTION_ABORTED"; - case 0xC0000242: return "STATUS_BAD_COMPRESSION_BUFFER"; - case 0xC0000243: return "STATUS_USER_MAPPED_FILE"; - case 0xC0000244: return "STATUS_AUDIT_FAILED"; - case 0xC0000245: return "STATUS_TIMER_RESOLUTION_NOT_SET"; - case 0xC0000246: return "STATUS_CONNECTION_COUNT_LIMIT"; - case 0xC0000247: return "STATUS_LOGIN_TIME_RESTRICTION"; - case 0xC0000248: return "STATUS_LOGIN_WKSTA_RESTRICTION"; - case 0xC0000249: return "STATUS_IMAGE_MP_UP_MISMATCH"; - case 0xC0000250: return "STATUS_INSUFFICIENT_LOGON_INFO"; - case 0xC0000251: return "STATUS_BAD_DLL_ENTRYPOINT"; - case 0xC0000252: return "STATUS_BAD_SERVICE_ENTRYPOINT"; - case 0xC0000253: return "STATUS_LPC_REPLY_LOST"; - case 0xC0000254: return "STATUS_IP_ADDRESS_CONFLICT1"; - case 0xC0000255: return "STATUS_IP_ADDRESS_CONFLICT2"; - case 0xC0000256: return "STATUS_REGISTRY_QUOTA_LIMIT"; - case 0xC0000257: return "STATUS_PATH_NOT_COVERED"; - case 0xC0000258: return "STATUS_NO_CALLBACK_ACTIVE"; - case 0xC0000259: return "STATUS_LICENSE_QUOTA_EXCEEDED"; - case 0xC000025A: return "STATUS_PWD_TOO_SHORT"; - case 0xC000025B: return "STATUS_PWD_TOO_RECENT"; - case 0xC000025C: return "STATUS_PWD_HISTORY_CONFLICT"; - case 0xC000025E: return "STATUS_PLUGPLAY_NO_DEVICE"; - case 0xC000025F: return "STATUS_UNSUPPORTED_COMPRESSION"; - case 0xC0000260: return "STATUS_INVALID_HW_PROFILE"; - case 0xC0000261: return "STATUS_INVALID_PLUGPLAY_DEVICE_PATH"; - case 0xC0000262: return "STATUS_DRIVER_ORDINAL_NOT_FOUND"; - case 0xC0000263: return "STATUS_DRIVER_ENTRYPOINT_NOT_FOUND"; - case 0xC0000264: return "STATUS_RESOURCE_NOT_OWNED"; - case 0xC0000265: return "STATUS_TOO_MANY_LINKS"; - case 0xC0000266: return "STATUS_QUOTA_LIST_INCONSISTENT"; - case 0xC0000267: return "STATUS_FILE_IS_OFFLINE"; - case 0xC0000268: return "STATUS_EVALUATION_EXPIRATION"; - case 0xC0000269: return "STATUS_ILLEGAL_DLL_RELOCATION"; - case 0xC000026A: return "STATUS_LICENSE_VIOLATION"; - case 0xC000026B: return "STATUS_DLL_INIT_FAILED_LOGOFF"; - case 0xC000026C: return "STATUS_DRIVER_UNABLE_TO_LOAD"; - case 0xC000026D: return "STATUS_DFS_UNAVAILABLE"; - case 0xC000026E: return "STATUS_VOLUME_DISMOUNTED"; - case 0xC000026F: return "STATUS_WX86_INTERNAL_ERROR"; - case 0xC0000270: return "STATUS_WX86_FLOAT_STACK_CHECK"; - case 0xC0000271: return "STATUS_VALIDATE_CONTINUE"; - case 0xC0000272: return "STATUS_NO_MATCH"; - case 0xC0000273: return "STATUS_NO_MORE_MATCHES"; - case 0xC0000275: return "STATUS_NOT_A_REPARSE_POINT"; - case 0xC0000276: return "STATUS_IO_REPARSE_TAG_INVALID"; - case 0xC0000277: return "STATUS_IO_REPARSE_TAG_MISMATCH"; - case 0xC0000278: return "STATUS_IO_REPARSE_DATA_INVALID"; - case 0xC0000279: return "STATUS_IO_REPARSE_TAG_NOT_HANDLED"; - case 0xC0000280: return "STATUS_REPARSE_POINT_NOT_RESOLVED"; - case 0xC0000281: return "STATUS_DIRECTORY_IS_A_REPARSE_POINT"; - case 0xC0000282: return "STATUS_RANGE_LIST_CONFLICT"; - case 0xC0000283: return "STATUS_SOURCE_ELEMENT_EMPTY"; - case 0xC0000284: return "STATUS_DESTINATION_ELEMENT_FULL"; - case 0xC0000285: return "STATUS_ILLEGAL_ELEMENT_ADDRESS"; - case 0xC0000286: return "STATUS_MAGAZINE_NOT_PRESENT"; - case 0xC0000287: return "STATUS_REINITIALIZATION_NEEDED"; - case 0x80000288: return "STATUS_DEVICE_REQUIRES_CLEANING"; - case 0x80000289: return "STATUS_DEVICE_DOOR_OPEN"; - case 0xC000028A: return "STATUS_ENCRYPTION_FAILED"; - case 0xC000028B: return "STATUS_DECRYPTION_FAILED"; - case 0xC000028C: return "STATUS_RANGE_NOT_FOUND"; - case 0xC000028D: return "STATUS_NO_RECOVERY_POLICY"; - case 0xC000028E: return "STATUS_NO_EFS"; - case 0xC000028F: return "STATUS_WRONG_EFS"; - case 0xC0000290: return "STATUS_NO_USER_KEYS"; - case 0xC0000291: return "STATUS_FILE_NOT_ENCRYPTED"; - case 0xC0000292: return "STATUS_NOT_EXPORT_FORMAT"; - case 0xC0000293: return "STATUS_FILE_ENCRYPTED"; - case 0x40000294: return "STATUS_WAKE_SYSTEM"; - case 0xC0000295: return "STATUS_WMI_GUID_NOT_FOUND"; - case 0xC0000296: return "STATUS_WMI_INSTANCE_NOT_FOUND"; - case 0xC0000297: return "STATUS_WMI_ITEMID_NOT_FOUND"; - case 0xC0000298: return "STATUS_WMI_TRY_AGAIN"; - case 0xC0000299: return "STATUS_SHARED_POLICY"; - case 0xC000029A: return "STATUS_POLICY_OBJECT_NOT_FOUND"; - case 0xC000029B: return "STATUS_POLICY_ONLY_IN_DS"; - case 0xC000029C: return "STATUS_VOLUME_NOT_UPGRADED"; - case 0xC000029D: return "STATUS_REMOTE_STORAGE_NOT_ACTIVE"; - case 0xC000029E: return "STATUS_REMOTE_STORAGE_MEDIA_ERROR"; - case 0xC000029F: return "STATUS_NO_TRACKING_SERVICE"; - case 0xC00002A0: return "STATUS_SERVER_SID_MISMATCH"; - case 0xC00002A1: return "STATUS_DS_NO_ATTRIBUTE_OR_VALUE"; - case 0xC00002A2: return "STATUS_DS_INVALID_ATTRIBUTE_SYNTAX"; - case 0xC00002A3: return "STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED"; - case 0xC00002A4: return "STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS"; - case 0xC00002A5: return "STATUS_DS_BUSY"; - case 0xC00002A6: return "STATUS_DS_UNAVAILABLE"; - case 0xC00002A7: return "STATUS_DS_NO_RIDS_ALLOCATED"; - case 0xC00002A8: return "STATUS_DS_NO_MORE_RIDS"; - case 0xC00002A9: return "STATUS_DS_INCORRECT_ROLE_OWNER"; - case 0xC00002AA: return "STATUS_DS_RIDMGR_INIT_ERROR"; - case 0xC00002AB: return "STATUS_DS_OBJ_CLASS_VIOLATION"; - case 0xC00002AC: return "STATUS_DS_CANT_ON_NON_LEAF"; - case 0xC00002AD: return "STATUS_DS_CANT_ON_RDN"; - case 0xC00002AE: return "STATUS_DS_CANT_MOD_OBJ_CLASS"; - case 0xC00002AF: return "STATUS_DS_CROSS_DOM_MOVE_FAILED"; - case 0xC00002B0: return "STATUS_DS_GC_NOT_AVAILABLE"; - case 0xC00002B1: return "STATUS_DIRECTORY_SERVICE_REQUIRED"; - case 0xC00002B2: return "STATUS_REPARSE_ATTRIBUTE_CONFLICT"; - case 0xC00002B3: return "STATUS_CANT_ENABLE_DENY_ONLY"; - case 0xC00002B4: return "STATUS_FLOAT_MULTIPLE_FAULTS"; - case 0xC00002B5: return "STATUS_FLOAT_MULTIPLE_TRAPS"; - case 0xC00002B6: return "STATUS_DEVICE_REMOVED"; - case 0xC00002B7: return "STATUS_JOURNAL_DELETE_IN_PROGRESS"; - case 0xC00002B8: return "STATUS_JOURNAL_NOT_ACTIVE"; - case 0xC00002B9: return "STATUS_NOINTERFACE"; - case 0xC00002C1: return "STATUS_DS_ADMIN_LIMIT_EXCEEDED"; - case 0xC00002C2: return "STATUS_DRIVER_FAILED_SLEEP"; - case 0xC00002C3: return "STATUS_MUTUAL_AUTHENTICATION_FAILED"; - case 0xC00002C4: return "STATUS_CORRUPT_SYSTEM_FILE"; - case 0xC00002C5: return "STATUS_DATATYPE_MISALIGNMENT_ERROR"; - case 0xC00002C6: return "STATUS_WMI_READ_ONLY"; - case 0xC00002C7: return "STATUS_WMI_SET_FAILURE"; - case 0xC00002C8: return "STATUS_COMMITMENT_MINIMUM"; - case 0xC00002C9: return "STATUS_REG_NAT_CONSUMPTION"; - case 0xC00002CA: return "STATUS_TRANSPORT_FULL"; - case 0xC00002CB: return "STATUS_DS_SAM_INIT_FAILURE"; - case 0xC00002CC: return "STATUS_ONLY_IF_CONNECTED"; - case 0xC00002CD: return "STATUS_DS_SENSITIVE_GROUP_VIOLATION"; - case 0xC00002CE: return "STATUS_PNP_RESTART_ENUMERATION"; - case 0xC00002CF: return "STATUS_JOURNAL_ENTRY_DELETED"; - case 0xC00002D0: return "STATUS_DS_CANT_MOD_PRIMARYGROUPID"; - case 0xC00002D1: return "STATUS_SYSTEM_IMAGE_BAD_SIGNATURE"; - case 0xC00002D2: return "STATUS_PNP_REBOOT_REQUIRED"; - case 0xC00002D3: return "STATUS_POWER_STATE_INVALID"; - case 0xC00002D4: return "STATUS_DS_INVALID_GROUP_TYPE"; - case 0xC00002D5: return "STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN"; - case 0xC00002D6: return "STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN"; - case 0xC00002D7: return "STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER"; - case 0xC00002D8: return "STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER"; - case 0xC00002D9: return "STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER"; - case 0xC00002DA: return "STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER"; - case 0xC00002DB: return "STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER"; - case 0xC00002DC: return "STATUS_DS_HAVE_PRIMARY_MEMBERS"; - case 0xC00002DD: return "STATUS_WMI_NOT_SUPPORTED"; - case 0xC00002DE: return "STATUS_INSUFFICIENT_POWER"; - case 0xC00002DF: return "STATUS_SAM_NEED_BOOTKEY_PASSWORD"; - case 0xC00002E0: return "STATUS_SAM_NEED_BOOTKEY_FLOPPY"; - case 0xC00002E1: return "STATUS_DS_CANT_START"; - case 0xC00002E2: return "STATUS_DS_INIT_FAILURE"; - case 0xC00002E3: return "STATUS_SAM_INIT_FAILURE"; - case 0xC00002E4: return "STATUS_DS_GC_REQUIRED"; - case 0xC00002E5: return "STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY"; - case 0xC00002E6: return "STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS"; - case 0xC00002E7: return "STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED"; - case 0xC00002E8: return "STATUS_MULTIPLE_FAULT_VIOLATION"; - case 0xC0000300: return "STATUS_NOT_SUPPORTED_ON_SBS"; - case 0xC0009898: return "STATUS_WOW_ASSERTION"; - case 0xC0010001: return "DBG_NO_STATE_CHANGE"; - case 0xC0010002: return "DBG_APP_NOT_IDLE"; - case 0xC0020001: return "RPC_NT_INVALID_STRING_BINDING"; - case 0xC0020002: return "RPC_NT_WRONG_KIND_OF_BINDING"; - case 0xC0020003: return "RPC_NT_INVALID_BINDING"; - case 0xC0020004: return "RPC_NT_PROTSEQ_NOT_SUPPORTED"; - case 0xC0020005: return "RPC_NT_INVALID_RPC_PROTSEQ"; - case 0xC0020006: return "RPC_NT_INVALID_STRING_UUID"; - case 0xC0020007: return "RPC_NT_INVALID_ENDPOINT_FORMAT"; - case 0xC0020008: return "RPC_NT_INVALID_NET_ADDR"; - case 0xC0020009: return "RPC_NT_NO_ENDPOINT_FOUND"; - case 0xC002000A: return "RPC_NT_INVALID_TIMEOUT"; - case 0xC002000B: return "RPC_NT_OBJECT_NOT_FOUND"; - case 0xC002000C: return "RPC_NT_ALREADY_REGISTERED"; - case 0xC002000D: return "RPC_NT_TYPE_ALREADY_REGISTERED"; - case 0xC002000E: return "RPC_NT_ALREADY_LISTENING"; - case 0xC002000F: return "RPC_NT_NO_PROTSEQS_REGISTERED"; - case 0xC0020010: return "RPC_NT_NOT_LISTENING"; - case 0xC0020011: return "RPC_NT_UNKNOWN_MGR_TYPE"; - case 0xC0020012: return "RPC_NT_UNKNOWN_IF"; - case 0xC0020013: return "RPC_NT_NO_BINDINGS"; - case 0xC0020014: return "RPC_NT_NO_PROTSEQS"; - case 0xC0020015: return "RPC_NT_CANT_CREATE_ENDPOINT"; - case 0xC0020016: return "RPC_NT_OUT_OF_RESOURCES"; - case 0xC0020017: return "RPC_NT_SERVER_UNAVAILABLE"; - case 0xC0020018: return "RPC_NT_SERVER_TOO_BUSY"; - case 0xC0020019: return "RPC_NT_INVALID_NETWORK_OPTIONS"; - case 0xC002001A: return "RPC_NT_NO_CALL_ACTIVE"; - case 0xC002001B: return "RPC_NT_CALL_FAILED"; - case 0xC002001C: return "RPC_NT_CALL_FAILED_DNE"; - case 0xC002001D: return "RPC_NT_PROTOCOL_ERROR"; - case 0xC002001F: return "RPC_NT_UNSUPPORTED_TRANS_SYN"; - case 0xC0020021: return "RPC_NT_UNSUPPORTED_TYPE"; - case 0xC0020022: return "RPC_NT_INVALID_TAG"; - case 0xC0020023: return "RPC_NT_INVALID_BOUND"; - case 0xC0020024: return "RPC_NT_NO_ENTRY_NAME"; - case 0xC0020025: return "RPC_NT_INVALID_NAME_SYNTAX"; - case 0xC0020026: return "RPC_NT_UNSUPPORTED_NAME_SYNTAX"; - case 0xC0020028: return "RPC_NT_UUID_NO_ADDRESS"; - case 0xC0020029: return "RPC_NT_DUPLICATE_ENDPOINT"; - case 0xC002002A: return "RPC_NT_UNKNOWN_AUTHN_TYPE"; - case 0xC002002B: return "RPC_NT_MAX_CALLS_TOO_SMALL"; - case 0xC002002C: return "RPC_NT_STRING_TOO_LONG"; - case 0xC002002D: return "RPC_NT_PROTSEQ_NOT_FOUND"; - case 0xC002002E: return "RPC_NT_PROCNUM_OUT_OF_RANGE"; - case 0xC002002F: return "RPC_NT_BINDING_HAS_NO_AUTH"; - case 0xC0020030: return "RPC_NT_UNKNOWN_AUTHN_SERVICE"; - case 0xC0020031: return "RPC_NT_UNKNOWN_AUTHN_LEVEL"; - case 0xC0020032: return "RPC_NT_INVALID_AUTH_IDENTITY"; - case 0xC0020033: return "RPC_NT_UNKNOWN_AUTHZ_SERVICE"; - case 0xC0020034: return "EPT_NT_INVALID_ENTRY"; - case 0xC0020035: return "EPT_NT_CANT_PERFORM_OP"; - case 0xC0020036: return "EPT_NT_NOT_REGISTERED"; - case 0xC0020037: return "RPC_NT_NOTHING_TO_EXPORT"; - case 0xC0020038: return "RPC_NT_INCOMPLETE_NAME"; - case 0xC0020039: return "RPC_NT_INVALID_VERS_OPTION"; - case 0xC002003A: return "RPC_NT_NO_MORE_MEMBERS"; - case 0xC002003B: return "RPC_NT_NOT_ALL_OBJS_UNEXPORTED"; - case 0xC002003C: return "RPC_NT_INTERFACE_NOT_FOUND"; - case 0xC002003D: return "RPC_NT_ENTRY_ALREADY_EXISTS"; - case 0xC002003E: return "RPC_NT_ENTRY_NOT_FOUND"; - case 0xC002003F: return "RPC_NT_NAME_SERVICE_UNAVAILABLE"; - case 0xC0020040: return "RPC_NT_INVALID_NAF_ID"; - case 0xC0020041: return "RPC_NT_CANNOT_SUPPORT"; - case 0xC0020042: return "RPC_NT_NO_CONTEXT_AVAILABLE"; - case 0xC0020043: return "RPC_NT_INTERNAL_ERROR"; - case 0xC0020044: return "RPC_NT_ZERO_DIVIDE"; - case 0xC0020045: return "RPC_NT_ADDRESS_ERROR"; - case 0xC0020046: return "RPC_NT_FP_DIV_ZERO"; - case 0xC0020047: return "RPC_NT_FP_UNDERFLOW"; - case 0xC0020048: return "RPC_NT_FP_OVERFLOW"; - case 0xC0030001: return "RPC_NT_NO_MORE_ENTRIES"; - case 0xC0030002: return "RPC_NT_SS_CHAR_TRANS_OPEN_FAIL"; - case 0xC0030003: return "RPC_NT_SS_CHAR_TRANS_SHORT_FILE"; - case 0xC0030004: return "RPC_NT_SS_IN_NULL_CONTEXT"; - case 0xC0030005: return "RPC_NT_SS_CONTEXT_MISMATCH"; - case 0xC0030006: return "RPC_NT_SS_CONTEXT_DAMAGED"; - case 0xC0030007: return "RPC_NT_SS_HANDLES_MISMATCH"; - case 0xC0030008: return "RPC_NT_SS_CANNOT_GET_CALL_HANDLE"; - case 0xC0030009: return "RPC_NT_NULL_REF_POINTER"; - case 0xC003000A: return "RPC_NT_ENUM_VALUE_OUT_OF_RANGE"; - case 0xC003000B: return "RPC_NT_BYTE_COUNT_TOO_SMALL"; - case 0xC003000C: return "RPC_NT_BAD_STUB_DATA"; - case 0xC0020049: return "RPC_NT_CALL_IN_PROGRESS"; - case 0xC002004A: return "RPC_NT_NO_MORE_BINDINGS"; - case 0xC002004B: return "RPC_NT_GROUP_MEMBER_NOT_FOUND"; - case 0xC002004C: return "EPT_NT_CANT_CREATE"; - case 0xC002004D: return "RPC_NT_INVALID_OBJECT"; - case 0xC002004F: return "RPC_NT_NO_INTERFACES"; - case 0xC0020050: return "RPC_NT_CALL_CANCELLED"; - case 0xC0020051: return "RPC_NT_BINDING_INCOMPLETE"; - case 0xC0020052: return "RPC_NT_COMM_FAILURE"; - case 0xC0020053: return "RPC_NT_UNSUPPORTED_AUTHN_LEVEL"; - case 0xC0020054: return "RPC_NT_NO_PRINC_NAME"; - case 0xC0020055: return "RPC_NT_NOT_RPC_ERROR"; - case 0x40020056: return "RPC_NT_UUID_LOCAL_ONLY"; - case 0xC0020057: return "RPC_NT_SEC_PKG_ERROR"; - case 0xC0020058: return "RPC_NT_NOT_CANCELLED"; - case 0xC0030059: return "RPC_NT_INVALID_ES_ACTION"; - case 0xC003005A: return "RPC_NT_WRONG_ES_VERSION"; - case 0xC003005B: return "RPC_NT_WRONG_STUB_VERSION"; - case 0xC003005C: return "RPC_NT_INVALID_PIPE_OBJECT"; - case 0xC003005D: return "RPC_NT_INVALID_PIPE_OPERATION"; - case 0xC003005E: return "RPC_NT_WRONG_PIPE_VERSION"; - case 0xC003005F: return "RPC_NT_PIPE_CLOSED"; - case 0xC0030060: return "RPC_NT_PIPE_DISCIPLINE_ERROR"; - case 0xC0030061: return "RPC_NT_PIPE_EMPTY"; - case 0xC0020062: return "RPC_NT_INVALID_ASYNC_HANDLE"; - case 0xC0020063: return "RPC_NT_INVALID_ASYNC_CALL"; - case 0x400200AF: return "RPC_NT_SEND_INCOMPLETE"; - case 0xC0140001: return "STATUS_ACPI_INVALID_OPCODE"; - case 0xC0140002: return "STATUS_ACPI_STACK_OVERFLOW"; - case 0xC0140003: return "STATUS_ACPI_ASSERT_FAILED"; - case 0xC0140004: return "STATUS_ACPI_INVALID_INDEX"; - case 0xC0140005: return "STATUS_ACPI_INVALID_ARGUMENT"; - case 0xC0140006: return "STATUS_ACPI_FATAL"; - case 0xC0140007: return "STATUS_ACPI_INVALID_SUPERNAME"; - case 0xC0140008: return "STATUS_ACPI_INVALID_ARGTYPE"; - case 0xC0140009: return "STATUS_ACPI_INVALID_OBJTYPE"; - case 0xC014000A: return "STATUS_ACPI_INVALID_TARGETTYPE"; - case 0xC014000B: return "STATUS_ACPI_INCORRECT_ARGUMENT_COUNT"; - case 0xC014000C: return "STATUS_ACPI_ADDRESS_NOT_MAPPED"; - case 0xC014000D: return "STATUS_ACPI_INVALID_EVENTTYPE"; - case 0xC014000E: return "STATUS_ACPI_HANDLER_COLLISION"; - case 0xC014000F: return "STATUS_ACPI_INVALID_DATA"; - case 0xC0140010: return "STATUS_ACPI_INVALID_REGION"; - case 0xC0140011: return "STATUS_ACPI_INVALID_ACCESS_SIZE"; - case 0xC0140012: return "STATUS_ACPI_ACQUIRE_GLOBAL_LOCK"; - case 0xC0140013: return "STATUS_ACPI_ALREADY_INITIALIZED"; - case 0xC0140014: return "STATUS_ACPI_NOT_INITIALIZED"; - case 0xC0140015: return "STATUS_ACPI_INVALID_MUTEX_LEVEL"; - case 0xC0140016: return "STATUS_ACPI_MUTEX_NOT_OWNED"; - case 0xC0140017: return "STATUS_ACPI_MUTEX_NOT_OWNER"; - case 0xC0140018: return "STATUS_ACPI_RS_ACCESS"; - case 0xC0140019: return "STATUS_ACPI_INVALID_TABLE"; - case 0xC0140020: return "STATUS_ACPI_REG_HANDLER_FAILED"; - case 0xC0140021: return "STATUS_ACPI_POWER_REQUEST_FAILED"; - case 0xC00A0001: return "STATUS_CTX_WINSTATION_NAME_INVALID"; - case 0xC00A0002: return "STATUS_CTX_INVALID_PD"; - case 0xC00A0003: return "STATUS_CTX_PD_NOT_FOUND"; - case 0x400A0004: return "STATUS_CTX_CDM_CONNECT"; - case 0x400A0005: return "STATUS_CTX_CDM_DISCONNECT"; - case 0xC00A0006: return "STATUS_CTX_CLOSE_PENDING"; - case 0xC00A0007: return "STATUS_CTX_NO_OUTBUF"; - case 0xC00A0008: return "STATUS_CTX_MODEM_INF_NOT_FOUND"; - case 0xC00A0009: return "STATUS_CTX_INVALID_MODEMNAME"; - case 0xC00A000A: return "STATUS_CTX_RESPONSE_ERROR"; - case 0xC00A000B: return "STATUS_CTX_MODEM_RESPONSE_TIMEOUT"; - case 0xC00A000C: return "STATUS_CTX_MODEM_RESPONSE_NO_CARRIER"; - case 0xC00A000D: return "STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE"; - case 0xC00A000E: return "STATUS_CTX_MODEM_RESPONSE_BUSY"; - case 0xC00A000F: return "STATUS_CTX_MODEM_RESPONSE_VOICE"; - case 0xC00A0010: return "STATUS_CTX_TD_ERROR"; - case 0xC00A0012: return "STATUS_CTX_LICENSE_CLIENT_INVALID"; - case 0xC00A0013: return "STATUS_CTX_LICENSE_NOT_AVAILABLE"; - case 0xC00A0014: return "STATUS_CTX_LICENSE_EXPIRED"; - case 0xC00A0015: return "STATUS_CTX_WINSTATION_NOT_FOUND"; - case 0xC00A0016: return "STATUS_CTX_WINSTATION_NAME_COLLISION"; - case 0xC00A0017: return "STATUS_CTX_WINSTATION_BUSY"; - case 0xC00A0018: return "STATUS_CTX_BAD_VIDEO_MODE"; - case 0xC00A0022: return "STATUS_CTX_GRAPHICS_INVALID"; - case 0xC00A0024: return "STATUS_CTX_NOT_CONSOLE"; - case 0xC00A0026: return "STATUS_CTX_CLIENT_QUERY_TIMEOUT"; - case 0xC00A0027: return "STATUS_CTX_CONSOLE_DISCONNECT"; - case 0xC00A0028: return "STATUS_CTX_CONSOLE_CONNECT"; - case 0xC00A002A: return "STATUS_CTX_SHADOW_DENIED"; - case 0xC00A002B: return "STATUS_CTX_WINSTATION_ACCESS_DENIED"; - case 0xC00A002E: return "STATUS_CTX_INVALID_WD"; - case 0xC00A002F: return "STATUS_CTX_WD_NOT_FOUND"; - case 0xC00A0030: return "STATUS_CTX_SHADOW_INVALID"; - case 0xC00A0031: return "STATUS_CTX_SHADOW_DISABLED"; - case 0xC00A0032: return "STATUS_RDP_PROTOCOL_ERROR"; - case 0xC00A0033: return "STATUS_CTX_CLIENT_LICENSE_NOT_SET"; - case 0xC00A0034: return "STATUS_CTX_CLIENT_LICENSE_IN_USE"; - case 0xC0040035: return "STATUS_PNP_BAD_MPS_TABLE"; - case 0xC0040036: return "STATUS_PNP_TRANSLATION_FAILED"; - case 0xC0040037: return "STATUS_PNP_IRQ_TRANSLATION_FAILED"; - default: return "STATUS_UNKNOWN"; - } -} - - -/* - * KsPrintf - * This function is variable-argument, level-sensitive debug print routine. - * If the specified debug level for the print statement is lower or equal - * to the current debug level, the message will be printed. - * - * Arguments: - * DebugPrintLevel - Specifies at which debugging level the string should - * be printed - * DebugMessage - Variable argument ascii c string - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -VOID -KsPrintf( - LONG DebugPrintLevel, - PCHAR DebugMessage, - ... - ) -{ - va_list ap; - - va_start(ap, DebugMessage); - - if (DebugPrintLevel <= KsDebugLevel) - { - CHAR buffer[0x200]; - - vsprintf(buffer, DebugMessage, ap); - - KdPrint(("TID:%8.8x: %s", PsGetCurrentThread(), buffer)); - } - - va_end(ap); - -} // KsPrint() - -#endif diff --git a/lnet/libcfs/winnt/winnt-fs.c b/lnet/libcfs/winnt/winnt-fs.c deleted file mode 100644 index 128781bffb2cb4c9726c2100f0cb41f6dce704f6..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-fs.c +++ /dev/null @@ -1,541 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> - -const CHAR *dos_file_prefix = "\\??\\"; - -/* - * cfs_filp_open - * To open or create a file in kernel mode - * - * Arguments: - * name: name of the file to be opened or created, no dos path prefix - * flags: open/creation attribute options - * mode: access mode/permission to open or create - * err: error code - * - * Return Value: - * the pointer to the cfs_file_t or NULL if it fails - * - * Notes: - * N/A - */ - -cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) -{ - cfs_file_t * fp = NULL; - - NTSTATUS Status; - - OBJECT_ATTRIBUTES ObjectAttributes; - HANDLE FileHandle; - IO_STATUS_BLOCK IoStatus; - ACCESS_MASK DesiredAccess; - ULONG CreateDisposition; - ULONG ShareAccess; - ULONG CreateOptions; - - USHORT NameLength = 0; - USHORT PrefixLength = 0; - - UNICODE_STRING UnicodeName; - PWCHAR UnicodeString = NULL; - - ANSI_STRING AnsiName; - PUCHAR AnsiString = NULL; - - /* Analyze the flags settings */ - - if (cfs_is_flag_set(flags, O_WRONLY)) { - DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = 0; - } else if (cfs_is_flag_set(flags, O_RDWR)) { - DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE; - } else { - DesiredAccess = (GENERIC_READ | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ; - } - - if (cfs_is_flag_set(flags, O_CREAT)) { - if (cfs_is_flag_set(flags, O_EXCL)) { - CreateDisposition = FILE_CREATE; - } else { - CreateDisposition = FILE_OPEN_IF; - } - } else { - CreateDisposition = FILE_OPEN; - } - - if (cfs_is_flag_set(flags, O_TRUNC)) { - if (cfs_is_flag_set(flags, O_EXCL)) { - CreateDisposition = FILE_OVERWRITE; - } else { - CreateDisposition = FILE_OVERWRITE_IF; - } - } - - CreateOptions = 0; - - if (cfs_is_flag_set(flags, O_DIRECTORY)) { - cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE); - } - - if (cfs_is_flag_set(flags, O_SYNC)) { - cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH); - } - - if (cfs_is_flag_set(flags, O_DIRECT)) { - cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING); - } - - /* Initialize the unicode path name for the specified file */ - - NameLength = (USHORT)strlen(name); - - if (name[0] != '\\') { - PrefixLength = (USHORT)strlen(dos_file_prefix); - } - - AnsiString = cfs_alloc( sizeof(CHAR) * (NameLength + PrefixLength + 1), - CFS_ALLOC_ZERO); - if (NULL == AnsiString) { - if (err) *err = -ENOMEM; - return NULL; - } - - UnicodeString = cfs_alloc( sizeof(WCHAR) * (NameLength + PrefixLength + 1), - CFS_ALLOC_ZERO); - - if (NULL == UnicodeString) { - if (err) *err = -ENOMEM; - cfs_free(AnsiString); - return NULL; - } - - if (PrefixLength) { - RtlCopyMemory(&AnsiString[0], dos_file_prefix , PrefixLength); - } - - RtlCopyMemory(&AnsiString[PrefixLength], name, NameLength); - NameLength += PrefixLength; - - AnsiName.MaximumLength = NameLength + 1; - AnsiName.Length = NameLength; - AnsiName.Buffer = AnsiString; - - UnicodeName.MaximumLength = (NameLength + 1) * sizeof(WCHAR); - UnicodeName.Length = 0; - UnicodeName.Buffer = (PWSTR)UnicodeString; - - RtlAnsiStringToUnicodeString(&UnicodeName, &AnsiName, FALSE); - - /* Setup the object attributes structure for the file. */ - - InitializeObjectAttributes( - &ObjectAttributes, - &UnicodeName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL ); - - /* Now to open or create the file now */ - - Status = ZwCreateFile( - &FileHandle, - DesiredAccess, - &ObjectAttributes, - &IoStatus, - 0, - FILE_ATTRIBUTE_NORMAL, - ShareAccess, - CreateDisposition, - CreateOptions, - NULL, - 0 ); - - /* Check the returned status of IoStatus... */ - - if (!NT_SUCCESS(IoStatus.Status)) { - *err = cfs_error_code(IoStatus.Status); - cfs_free(UnicodeString); - cfs_free(AnsiString); - return NULL; - } - - /* Allocate the cfs_file_t: libcfs file object */ - - fp = cfs_alloc(sizeof(cfs_file_t) + NameLength, CFS_ALLOC_ZERO); - - if (NULL == fp) { - Status = ZwClose(FileHandle); - ASSERT(NT_SUCCESS(Status)); - *err = -ENOMEM; - cfs_free(UnicodeString); - cfs_free(AnsiString); - return NULL; - } - - fp->f_handle = FileHandle; - strcpy(fp->f_name, name); - fp->f_flags = flags; - fp->f_mode = (mode_t)mode; - fp->f_count = 1; - *err = 0; - - /* free the memory of temporary name strings */ - cfs_free(UnicodeString); - cfs_free(AnsiString); - - return fp; -} - - -/* - * cfs_filp_close - * To close the opened file and release the filp structure - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * ZERO: on success - * Non-Zero: on failure - * - * Notes: - * N/A - */ - -int cfs_filp_close(cfs_file_t *fp) -{ - NTSTATUS Status; - - ASSERT(fp != NULL); - ASSERT(fp->f_handle != NULL); - - /* release the file handle */ - Status = ZwClose(fp->f_handle); - ASSERT(NT_SUCCESS(Status)); - - /* free the file flip structure */ - cfs_free(fp); - return 0; -} - - -/* - * cfs_filp_read - * To read data from the opened file - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * buf: pointer to the buffer to contain the data - * nbytes: size in bytes to be read from the file - * pos: offset in file where reading starts, if pos - * NULL, then read from current file offset - * - * Return Value: - * Actual size read into the buffer in success case - * Error code in failure case - * - * Notes: - * N/A - */ - -int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos) -{ - LARGE_INTEGER address; - NTSTATUS Status; - IO_STATUS_BLOCK IoStatus; - - int rc = 0; - - /* Read data from the file into the specified buffer */ - - if (pos != NULL) { - address.QuadPart = *pos; - } else { - address.QuadPart = fp->f_pos; - } - - Status = ZwReadFile( fp->f_handle, - 0, - NULL, - NULL, - &IoStatus, - buf, - nbytes, - &address, - NULL ); - - if (!NT_SUCCESS(IoStatus.Status)) { - rc = cfs_error_code(IoStatus.Status); - } else { - rc = (int)IoStatus.Information; - fp->f_pos = address.QuadPart + rc; - - if (pos != NULL) { - *pos = fp->f_pos; - } - } - - return rc; -} - - -/* - * cfs_filp_wrtie - * To write specified data to the opened file - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * buf: pointer to the buffer containing the data - * nbytes: size in bytes to be written to the file - * pos: offset in file where writing starts, if pos - * NULL, then write to current file offset - * - * Return Value: - * Actual size written into the buffer in success case - * Error code in failure case - * - * Notes: - * N/A - */ - -int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos) -{ - LARGE_INTEGER address; - NTSTATUS Status; - IO_STATUS_BLOCK IoStatus; - int rc = 0; - - /* Write user specified data into the file */ - - if (pos != NULL) { - address.QuadPart = *pos; - } else { - address.QuadPart = fp->f_pos; - } - - Status = ZwWriteFile( fp->f_handle, - 0, - NULL, - NULL, - &IoStatus, - buf, - nbytes, - &address, - NULL ); - - if (!NT_SUCCESS(Status)) { - rc = cfs_error_code(Status); - } else { - rc = (int)IoStatus.Information; - fp->f_pos = address.QuadPart + rc; - - if (pos != NULL) { - *pos = fp->f_pos; - } - } - - return rc; -} - - -NTSTATUS -CompletionRoutine( - PDEVICE_OBJECT DeviceObject, - PIRP Irp, - PVOID Context) -{ - /* copy the IoStatus result */ - *Irp->UserIosb = Irp->IoStatus; - - /* singal the event we set */ - KeSetEvent(Irp->UserEvent, 0, FALSE); - - /* free the Irp we allocated */ - IoFreeIrp(Irp); - - return STATUS_MORE_PROCESSING_REQUIRED; -} - - -/* - * cfs_filp_fsync - * To sync the dirty data of the file to disk - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * Zero: in success case - * Error code: in failure case - * - * Notes: - * Nt kernel doesn't export such a routine to flush a file, - * we must allocate our own Irp and issue it to the file - * system driver. - */ - -int cfs_filp_fsync(cfs_file_t *fp) -{ - - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - - NTSTATUS Status; - PIRP Irp; - KEVENT Event; - IO_STATUS_BLOCK IoSb; - PIO_STACK_LOCATION IrpSp; - - /* get the FileObject and the DeviceObject */ - - Status = ObReferenceObjectByHandle( - fp->f_handle, - FILE_WRITE_DATA, - NULL, - KernelMode, - (PVOID*)&FileObject, - NULL ); - - if (!NT_SUCCESS(Status)) { - return cfs_error_code(Status); - } - - DeviceObject = IoGetRelatedDeviceObject(FileObject); - - /* allocate a new Irp */ - - Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); - - if (!Irp) { - - ObDereferenceObject(FileObject); - return -ENOMEM; - } - - /* intialize the event */ - KeInitializeEvent(&Event, SynchronizationEvent, FALSE); - - /* setup the Irp */ - Irp->UserEvent = &Event; - Irp->UserIosb = &IoSb; - Irp->RequestorMode = KernelMode; - - Irp->Tail.Overlay.Thread = PsGetCurrentThread(); - Irp->Tail.Overlay.OriginalFileObject = FileObject; - - /* setup the Irp stack location */ - IrpSp = IoGetNextIrpStackLocation(Irp); - - IrpSp->MajorFunction = IRP_MJ_FLUSH_BUFFERS; - IrpSp->DeviceObject = DeviceObject; - IrpSp->FileObject = FileObject; - - IoSetCompletionRoutine(Irp, CompletionRoutine, 0, TRUE, TRUE, TRUE); - - - /* issue the Irp to the underlying file system driver */ - IoCallDriver(DeviceObject, Irp); - - /* wait until it is finished */ - KeWaitForSingleObject(&Event, Executive, KernelMode, TRUE, 0); - - /* cleanup our reference on it */ - ObDereferenceObject(FileObject); - - Status = IoSb.Status; - - return cfs_error_code(Status); -} - -/* - * cfs_get_file - * To increase the reference of the file object - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * Zero: in success case - * Non-Zero: in failure case - * - * Notes: - * N/A - */ - -int cfs_get_file(cfs_file_t *fp) -{ - InterlockedIncrement(&(fp->f_count)); - return 0; -} - - -/* - * cfs_put_file - * To decrease the reference of the file object - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * Zero: in success case - * Non-Zero: in failure case - * - * Notes: - * N/A - */ - -int cfs_put_file(cfs_file_t *fp) -{ - if (InterlockedDecrement(&(fp->f_count)) == 0) { - cfs_filp_close(fp); - } - - return 0; -} - - -/* - * cfs_file_count - * To query the reference count of the file object - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * the reference count of the file object - * - * Notes: - * N/A - */ - -int cfs_file_count(cfs_file_t *fp) -{ - return (int)(fp->f_count); -} diff --git a/lnet/libcfs/winnt/winnt-lock.c b/lnet/libcfs/winnt/winnt-lock.c deleted file mode 100644 index 12dbc67ab48b869095f59167db5317f1454b13ea..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-lock.c +++ /dev/null @@ -1,353 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> - - -#if _X86_ - -void __declspec (naked) FASTCALL -atomic_add( - int i, - atomic_t *v - ) -{ - // ECX = i - // EDX = v ; [EDX][0] = v->counter - - __asm { - lock add dword ptr [edx][0], ecx - ret - } -} - -void __declspec (naked) FASTCALL -atomic_sub( - int i, - atomic_t *v - ) -{ - // ECX = i - // EDX = v ; [EDX][0] = v->counter - - __asm { - lock sub dword ptr [edx][0], ecx - ret - } -} - -void __declspec (naked) FASTCALL -atomic_inc( - atomic_t *v - ) -{ - //InterlockedIncrement((PULONG)(&((v)->counter))); - - //` ECX = v ; [ECX][0] = v->counter - - __asm { - lock inc dword ptr [ecx][0] - ret - } -} - -void __declspec (naked) FASTCALL -atomic_dec( - atomic_t *v - ) -{ - // ECX = v ; [ECX][0] = v->counter - - __asm { - lock dec dword ptr [ecx][0] - ret - } -} - -int __declspec (naked) FASTCALL -atomic_sub_and_test( - int i, - atomic_t *v - ) -{ - - // ECX = i - // EDX = v ; [EDX][0] = v->counter - - __asm { - xor eax, eax - lock sub dword ptr [edx][0], ecx - sete al - ret - } -} - -int __declspec (naked) FASTCALL -atomic_inc_and_test( - atomic_t *v - ) -{ - // ECX = v ; [ECX][0] = v->counter - - __asm { - xor eax, eax - lock inc dword ptr [ecx][0] - sete al - ret - } -} - -int __declspec (naked) FASTCALL -atomic_dec_and_test( - atomic_t *v - ) -{ - // ECX = v ; [ECX][0] = v->counter - - __asm { - xor eax, eax - lock dec dword ptr [ecx][0] - sete al - ret - } -} - -#else - -void FASTCALL -atomic_add( - int i, - atomic_t *v - ) -{ - InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (i)); -} - -void FASTCALL -atomic_sub( - int i, - atomic_t *v - ) -{ - InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (-1*i)); -} - -void FASTCALL -atomic_inc( - atomic_t *v - ) -{ - InterlockedIncrement((PULONG)(&((v)->counter))); -} - -void FASTCALL -atomic_dec( - atomic_t *v - ) -{ - InterlockedDecrement((PULONG)(&((v)->counter))); -} - -int FASTCALL -atomic_sub_and_test( - int i, - atomic_t *v - ) -{ - int counter, result; - - do { - - counter = v->counter; - result = counter - i; - - } while ( InterlockedCompareExchange( - &(v->counter), - result, - counter) != counter); - - return (result == 0); -} - -int FASTCALL -atomic_inc_and_test( - atomic_t *v - ) -{ - int counter, result; - - do { - - counter = v->counter; - result = counter + 1; - - } while ( InterlockedCompareExchange( - &(v->counter), - result, - counter) != counter); - - return (result == 0); -} - -int FASTCALL -atomic_dec_and_test( - atomic_t *v - ) -{ - int counter, result; - - do { - - counter = v->counter; - result = counter + 1; - - } while ( InterlockedCompareExchange( - &(v->counter), - result, - counter) != counter); - - return (result == 0); -} - -#endif - - -/* - * rw spinlock - */ - - -void -rwlock_init(rwlock_t * rwlock) -{ - spin_lock_init(&rwlock->guard); - rwlock->count = 0; -} - -void -rwlock_fini(rwlock_t * rwlock) -{ -} - -void -read_lock(rwlock_t * rwlock) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - ASSERT(slot->Magic == TASKSLT_MAGIC); - - slot->irql = KeRaiseIrqlToDpcLevel(); - - while (TRUE) { - spin_lock(&rwlock->guard); - if (rwlock->count >= 0) - break; - spin_unlock(&rwlock->guard); - } - - rwlock->count++; - spin_unlock(&rwlock->guard); -} - -void -read_unlock(rwlock_t * rwlock) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - ASSERT(slot->Magic == TASKSLT_MAGIC); - - spin_lock(&rwlock->guard); - ASSERT(rwlock->count > 0); - rwlock->count--; - if (rwlock < 0) { - cfs_enter_debugger(); - } - spin_unlock(&rwlock->guard); - - KeLowerIrql(slot->irql); -} - -void -write_lock(rwlock_t * rwlock) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - ASSERT(slot->Magic == TASKSLT_MAGIC); - - slot->irql = KeRaiseIrqlToDpcLevel(); - - while (TRUE) { - spin_lock(&rwlock->guard); - if (rwlock->count == 0) - break; - spin_unlock(&rwlock->guard); - } - - rwlock->count = -1; - spin_unlock(&rwlock->guard); -} - -void -write_unlock(rwlock_t * rwlock) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - ASSERT(slot->Magic == TASKSLT_MAGIC); - - spin_lock(&rwlock->guard); - ASSERT(rwlock->count == -1); - rwlock->count = 0; - spin_unlock(&rwlock->guard); - - KeLowerIrql(slot->irql); -} diff --git a/lnet/libcfs/winnt/winnt-lwt.c b/lnet/libcfs/winnt/winnt-lwt.c deleted file mode 100644 index 272cbcf412da8065e0d35e6a7cdebe87b44c41ab..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-lwt.c +++ /dev/null @@ -1,20 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -# define DEBUG_SUBSYSTEM S_LNET - diff --git a/lnet/libcfs/winnt/winnt-mem.c b/lnet/libcfs/winnt/winnt-mem.c deleted file mode 100644 index 6b66a95c6bbbce0386aa03a5f8a352f15b604915..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-mem.c +++ /dev/null @@ -1,332 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> - - -cfs_mem_cache_t *cfs_page_t_slab = NULL; -cfs_mem_cache_t *cfs_page_p_slab = NULL; - -/* - * cfs_alloc_page - * To allocate the cfs_page_t and also 1 page of memory - * - * Arguments: - * flags: the allocation options - * - * Return Value: - * pointer to the cfs_page_t strcture in success or - * NULL in failure case - * - * Notes: - * N/A - */ - -cfs_page_t * cfs_alloc_page(int flags) -{ - cfs_page_t *pg; - pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0); - - if (NULL == pg) { - cfs_enter_debugger(); - return NULL; - } - - memset(pg, 0, sizeof(cfs_page_t)); - pg->addr = cfs_mem_cache_alloc(cfs_page_p_slab, 0); - atomic_set(&pg->count, 1); - - if (pg->addr) { - if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) { - memset(pg->addr, 0, CFS_PAGE_SIZE); - } - } else { - cfs_enter_debugger(); - cfs_mem_cache_free(cfs_page_t_slab, pg); - pg = NULL; - } - - return pg; -} - -/* - * cfs_free_page - * To free the cfs_page_t including the page - * - * Arguments: - * pg: pointer to the cfs_page_t strcture - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ -void cfs_free_page(cfs_page_t *pg) -{ - ASSERT(pg != NULL); - ASSERT(pg->addr != NULL); - ASSERT(atomic_read(&pg->count) <= 1); - - cfs_mem_cache_free(cfs_page_p_slab, pg->addr); - cfs_mem_cache_free(cfs_page_t_slab, pg); -} - - -/* - * cfs_alloc - * To allocate memory from system pool - * - * Arguments: - * nr_bytes: length in bytes of the requested buffer - * flags: flags indiction - * - * Return Value: - * NULL: if there's no enough memory space in system - * the address of the allocated memory in success. - * - * Notes: - * This operation can be treated as atomic. - */ - -void * -cfs_alloc(size_t nr_bytes, u_int32_t flags) -{ - void *ptr; - - /* Ignore the flags: always allcoate from NonPagedPool */ - - ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs'); - - if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) { - memset(ptr, 0, nr_bytes); - } - - if (!ptr) { - cfs_enter_debugger(); - } - - return ptr; -} - -/* - * cfs_free - * To free the sepcified memory to system pool - * - * Arguments: - * addr: pointer to the buffer to be freed - * - * Return Value: - * N/A - * - * Notes: - * This operation can be treated as atomic. - */ - -void -cfs_free(void *addr) -{ - ExFreePool(addr); -} - -/* - * cfs_alloc_large - * To allocate large block of memory from system pool - * - * Arguments: - * nr_bytes: length in bytes of the requested buffer - * - * Return Value: - * NULL: if there's no enough memory space in system - * the address of the allocated memory in success. - * - * Notes: - * N/A - */ - -void * -cfs_alloc_large(size_t nr_bytes) -{ - return cfs_alloc(nr_bytes, 0); -} - -/* - * cfs_free_large - * To free the sepcified memory to system pool - * - * Arguments: - * addr: pointer to the buffer to be freed - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -cfs_free_large(void *addr) -{ - cfs_free(addr); -} - - -/* - * cfs_mem_cache_create - * To create a SLAB cache - * - * Arguments: - * name: name string of the SLAB cache to be created - * size: size in bytes of SLAB entry buffer - * offset: offset in the page - * flags: SLAB creation flags -* - * Return Value: - * The poitner of cfs_memory_cache structure in success. - * NULL pointer in failure case. - * - * Notes: - * 1, offset won't be used here. - * 2, it could be better to induce a lock to protect the access of the - * SLAB structure on SMP if there's not outside lock protection. - * 3, parameters C/D are removed. - */ - -cfs_mem_cache_t * -cfs_mem_cache_create( - const char * name, - size_t size, - size_t offset, - unsigned long flags - ) -{ - cfs_mem_cache_t * kmc = NULL; - - /* The name of the SLAB could not exceed 20 chars */ - - if (name && strlen(name) >= 20) { - goto errorout; - } - - /* Allocate and initialize the SLAB strcture */ - - kmc = cfs_alloc (sizeof(cfs_mem_cache_t), 0); - - if (NULL == kmc) { - goto errorout; - } - - memset(kmc, 0, sizeof(cfs_mem_cache_t)); - - kmc->flags = flags; - - if (name) { - strcpy(&kmc->name[0], name); - } - - /* Initialize the corresponding LookAside list */ - - ExInitializeNPagedLookasideList( - &(kmc->npll), - NULL, - NULL, - 0, - size, - 'pnmk', - 0); - -errorout: - - return kmc; -} - -/* - * cfs_mem_cache_destroy - * To destroy the unused SLAB cache - * - * Arguments: - * kmc: the SLAB cache to be destroied. - * - * Return Value: - * 0: in success case. - * 1: in failure case. - * - * Notes: - * N/A - */ - -int cfs_mem_cache_destroy (cfs_mem_cache_t * kmc) -{ - ASSERT(kmc != NULL); - - ExDeleteNPagedLookasideList(&(kmc->npll)); - - cfs_free(kmc); - - return 0; -} - -/* - * cfs_mem_cache_alloc - * To allocate an object (LookAside entry) from the SLAB - * - * Arguments: - * kmc: the SLAB cache to be allocated from. - * flags: flags for allocation options - * - * Return Value: - * object buffer address: in success case. - * NULL: in failure case. - * - * Notes: - * N/A - */ - -void *cfs_mem_cache_alloc(cfs_mem_cache_t * kmc, int flags) -{ - void *buf = NULL; - - buf = ExAllocateFromNPagedLookasideList(&(kmc->npll)); - - return buf; -} - -/* - * cfs_mem_cache_free - * To free an object (LookAside entry) to the SLAB cache - * - * Arguments: - * kmc: the SLAB cache to be freed to. - * buf: the pointer to the object to be freed. - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_mem_cache_free(cfs_mem_cache_t * kmc, void * buf) -{ - ExFreeToNPagedLookasideList(&(kmc->npll), buf); -} diff --git a/lnet/libcfs/winnt/winnt-module.c b/lnet/libcfs/winnt/winnt-module.c deleted file mode 100644 index 2b6b00888e06a4cbb6cd400610724d8e59854718..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-module.c +++ /dev/null @@ -1,160 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - - -#define DEBUG_SUBSYSTEM S_LIBCFS - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#define LIBCFS_MINOR 240 - -int libcfs_ioctl_getdata(char *buf, char *end, void *arg) -{ - struct libcfs_ioctl_hdr *hdr; - struct libcfs_ioctl_data *data; - int err; - ENTRY; - - hdr = (struct libcfs_ioctl_hdr *)buf; - data = (struct libcfs_ioctl_data *)buf; - - err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); - if (err) - RETURN(err); - - if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { - CERROR(("LIBCFS: version mismatch kernel vs application\n")); - RETURN(-EINVAL); - } - - if (hdr->ioc_len + buf >= end) { - CERROR(("LIBCFS: user buffer exceeds kernel buffer\n")); - RETURN(-EINVAL); - } - - if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { - CERROR(("LIBCFS: user buffer too small for ioctl\n")); - RETURN(-EINVAL); - } - - err = copy_from_user(buf, (void *)arg, hdr->ioc_len); - if (err) - RETURN(err); - - if (libcfs_ioctl_is_invalid(data)) { - CERROR(("LIBCFS: ioctl not correctly formatted\n")); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1) - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - - if (data->ioc_inllen2) - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1); - - RETURN(0); -} - -extern struct cfs_psdev_ops libcfs_psdev_ops; - -static int -libcfs_psdev_open(cfs_file_t * file) -{ - struct libcfs_device_userstate **pdu = NULL; - int rc = 0; - - pdu = (struct libcfs_device_userstate **)&file->private_data; - if (libcfs_psdev_ops.p_open != NULL) - rc = libcfs_psdev_ops.p_open(0, (void *)pdu); - else - return (-EPERM); - return rc; -} - -/* called when closing /dev/device */ -static int -libcfs_psdev_release(cfs_file_t * file) -{ - struct libcfss_device_userstate *pdu; - int rc = 0; - - pdu = file->private_data; - if (libcfs_psdev_ops.p_close != NULL) - rc = libcfs_psdev_ops.p_close(0, (void *)pdu); - else - rc = -EPERM; - return rc; -} - -static int -libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg) -{ - struct cfs_psdev_file pfile; - int rc = 0; - - if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || - _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || - _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) { - CDEBUG(D_IOCTL, ("invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd))); - return (-EINVAL); - } - - /* Handle platform-dependent IOC requests */ - switch (cmd) { - case IOC_LIBCFS_PANIC: - if (!capable (CAP_SYS_BOOT)) - return (-EPERM); - CERROR(("debugctl-invoked panic")); - KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL); - - return (0); - case IOC_LIBCFS_MEMHOG: - - if (!capable (CAP_SYS_ADMIN)) - return -EPERM; - break; - } - - pfile.off = 0; - pfile.private_data = file->private_data; - if (libcfs_psdev_ops.p_ioctl != NULL) - rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); - else - rc = -EPERM; - return (rc); -} - -static struct file_operations libcfs_fops = { - /* lseek: */ NULL, - /* read: */ NULL, - /* write: */ NULL, - /* ioctl: */ libcfs_ioctl, - /* open: */ libcfs_psdev_open, - /* release:*/ libcfs_psdev_release -}; - -cfs_psdev_t libcfs_dev = { - LIBCFS_MINOR, - "lnet", - &libcfs_fops -}; - diff --git a/lnet/libcfs/winnt/winnt-prim.c b/lnet/libcfs/winnt/winnt-prim.c deleted file mode 100644 index 064b071ecdff6782f3702dd2eb39aeeb11fd45dd..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-prim.c +++ /dev/null @@ -1,650 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - - -/* - * Thread routines - */ - -/* - * cfs_thread_proc - * Lustre thread procedure wrapper routine (It's an internal routine) - * - * Arguments: - * context: a structure of cfs_thread_context_t, containing - * all the necessary parameters - * - * Return Value: - * void: N/A - * - * Notes: - * N/A - */ - -void -cfs_thread_proc( - void * context - ) -{ - cfs_thread_context_t * thread_context = - (cfs_thread_context_t *) context; - - /* Execute the specified function ... */ - - if (thread_context->func) { - (thread_context->func)(thread_context->arg); - } - - /* Free the context memory */ - - cfs_free(context); - - /* Terminate this system thread */ - - PsTerminateSystemThread(STATUS_SUCCESS); -} - -/* - * cfs_kernel_thread - * Create a system thread to execute the routine specified - * - * Arguments: - * func: function to be executed in the thread - * arg: argument transferred to func function - * flag: thread creation flags. - * - * Return Value: - * int: 0 on success or error codes - * - * Notes: - * N/A - */ - -int cfs_kernel_thread(int (*func)(void *), void *arg, int flag) -{ - cfs_handle_t thread = NULL; - NTSTATUS status; - cfs_thread_context_t * context = NULL; - - /* Allocate the context to be transferred to system thread */ - - context = cfs_alloc(sizeof(cfs_thread_context_t), CFS_ALLOC_ZERO); - - if (!context) { - return -ENOMEM; - } - - context->func = func; - context->arg = arg; - - /* Create system thread with the cfs_thread_proc wrapper */ - - status = PsCreateSystemThread( - &thread, - (ACCESS_MASK)0L, - 0, 0, 0, - cfs_thread_proc, - context); - - if (!NT_SUCCESS(status)) { - - - cfs_free(context); - - /* We need translate the nt status to linux error code */ - - return cfs_error_code(status); - } - - // - // Query the thread id of the newly created thread - // - - ZwClose(thread); - - return 0; -} - - -/* - * Symbols routines - */ - - -static CFS_DECL_RWSEM(cfs_symbol_lock); -CFS_LIST_HEAD(cfs_symbol_list); - -int MPSystem = FALSE; - -/* - * cfs_symbol_get - * To query the specified symbol form the symbol table - * - * Arguments: - * name: the symbol name to be queried - * - * Return Value: - * If the symbol is in the table, return the address of it. - * If not, return NULL. - * - * Notes: - * N/A - */ - -void * -cfs_symbol_get(const char *name) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_read(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - sym->ref ++; - break; - } - } - up_read(&cfs_symbol_lock); - - if (sym != NULL) - return sym->value; - - return NULL; -} - -/* - * cfs_symbol_put - * To decrease the reference of the specified symbol - * - * Arguments: - * name: the symbol name to be dereferred - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -cfs_symbol_put(const char *name) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_read(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - LASSERT(sym->ref > 0); - sym->ref--; - break; - } - } - up_read(&cfs_symbol_lock); - - LASSERT(sym != NULL); -} - - -/* - * cfs_symbol_register - * To register the specified symbol infromation - * - * Arguments: - * name: the symbol name to be dereferred - * value: the value that the symbol stands for - * - * Return Value: - * N/A - * - * Notes: - * Zero: Succeed to register - * Non-Zero: Fail to register the symbol - */ - -int -cfs_symbol_register(const char *name, const void *value) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - struct cfs_symbol *new = NULL; - - new = cfs_alloc(sizeof(struct cfs_symbol), CFS_ALLOC_ZERO); - if (!new) { - return (-ENOMEM); - } - strncpy(new->name, name, CFS_SYMBOL_LEN); - new->value = (void *)value; - new->ref = 0; - CFS_INIT_LIST_HEAD(&new->sym_list); - - down_write(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - up_write(&cfs_symbol_lock); - cfs_free(new); - return 0; // alreay registerred - } - } - list_add_tail(&new->sym_list, &cfs_symbol_list); - up_write(&cfs_symbol_lock); - - return 0; -} - -/* - * cfs_symbol_unregister - * To unregister/remove the specified symbol - * - * Arguments: - * name: the symbol name to be dereferred - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -cfs_symbol_unregister(const char *name) -{ - struct list_head *walker; - struct list_head *nxt; - struct cfs_symbol *sym = NULL; - - down_write(&cfs_symbol_lock); - list_for_each_safe(walker, nxt, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - LASSERT(sym->ref == 0); - list_del (&sym->sym_list); - cfs_free(sym); - break; - } - } - up_write(&cfs_symbol_lock); -} - -/* - * cfs_symbol_clean - * To clean all the symbols - * - * Arguments: - * N/A - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -cfs_symbol_clean() -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_write(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - LASSERT(sym->ref == 0); - list_del (&sym->sym_list); - cfs_free(sym); - } - up_write(&cfs_symbol_lock); - return; -} - - - -/* - * Timer routines - */ - - -/* Timer dpc procedure */ - -static void -cfs_timer_dpc_proc ( - IN PKDPC Dpc, - IN PVOID DeferredContext, - IN PVOID SystemArgument1, - IN PVOID SystemArgument2) -{ - cfs_timer_t * timer; - KIRQL Irql; - - timer = (cfs_timer_t *) DeferredContext; - - /* clear the flag */ - KeAcquireSpinLock(&(timer->Lock), &Irql); - cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); - KeReleaseSpinLock(&(timer->Lock), Irql); - - /* call the user specified timer procedure */ - timer->proc((unsigned long)(timer->arg)); -} - -/* - * cfs_timer_init - * To initialize the cfs_timer_t - * - * Arguments: - * timer: the cfs_timer to be initialized - * func: the timer callback procedure - * arg: argument for the callback proc - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_timer_init(cfs_timer_t *timer, void (*func)(unsigned long), void *arg) -{ - memset(timer, 0, sizeof(cfs_timer_t)); - - timer->proc = func; - timer->arg = arg; - - KeInitializeSpinLock(&(timer->Lock)); - KeInitializeTimer(&timer->Timer); - KeInitializeDpc (&timer->Dpc, cfs_timer_dpc_proc, timer); - - cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_INITED); -} - -/* - * cfs_timer_done - * To finialize the cfs_timer_t (unused) - * - * Arguments: - * timer: the cfs_timer to be cleaned up - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_timer_done(cfs_timer_t *timer) -{ - return; -} - -/* - * cfs_timer_arm - * To schedule the timer while touching @deadline - * - * Arguments: - * timer: the cfs_timer to be freed - * dealine: timeout value to wake up the timer - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_timer_arm(cfs_timer_t *timer, cfs_time_t deadline) -{ - LARGE_INTEGER timeout; - KIRQL Irql; - - KeAcquireSpinLock(&(timer->Lock), &Irql); - if (!cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)){ - - timeout.QuadPart = (LONGLONG)-1*1000*1000*10/HZ*deadline; - - if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc )) { - cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); - } - - timer->deadline = deadline; - } - - KeReleaseSpinLock(&(timer->Lock), Irql); -} - -/* - * cfs_timer_disarm - * To discard the timer to be scheduled - * - * Arguments: - * timer: the cfs_timer to be discarded - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_timer_disarm(cfs_timer_t *timer) -{ - KIRQL Irql; - - KeAcquireSpinLock(&(timer->Lock), &Irql); - KeCancelTimer(&(timer->Timer)); - cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); - KeReleaseSpinLock(&(timer->Lock), Irql); -} - - -/* - * cfs_timer_is_armed - * To check the timer is scheduled or not - * - * Arguments: - * timer: the cfs_timer to be checked - * - * Return Value: - * 1: if it's armed. - * 0: if it's not. - * - * Notes: - * N/A - */ - -int cfs_timer_is_armed(cfs_timer_t *timer) -{ - int rc = 0; - KIRQL Irql; - - KeAcquireSpinLock(&(timer->Lock), &Irql); - if (cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)) { - rc = 1; - } - KeReleaseSpinLock(&(timer->Lock), Irql); - - return rc; -} - -/* - * cfs_timer_deadline - * To query the deadline of the timer - * - * Arguments: - * timer: the cfs_timer to be queried - * - * Return Value: - * the deadline value - * - * Notes: - * N/A - */ - -cfs_time_t cfs_timer_deadline(cfs_timer_t * timer) -{ - return timer->deadline; -} - -/* - * daemonize routine stub - */ - -void cfs_daemonize(char *str) -{ - return; -} - -/* - * routine related with sigals - */ - -cfs_sigset_t cfs_get_blockedsigs() -{ - return 0; -} - -cfs_sigset_t cfs_block_allsigs() -{ - return 0; -} - -cfs_sigset_t cfs_block_sigs(sigset_t bit) -{ - return 0; -} - -void cfs_restore_sigs(cfs_sigset_t old) -{ -} - -int cfs_signal_pending(void) -{ - return 0; -} - -void cfs_clear_sigpending(void) -{ - return; -} - -/** - ** Initialize routines - **/ - -int -libcfs_arch_init(void) -{ - int rc; - - spinlock_t lock; - /* Workground to check the system is MP build or UP build */ - spin_lock_init(&lock); - spin_lock(&lock); - MPSystem = (int)lock.lock; - /* MP build system: it's a real spin, for UP build system, it - only raises the IRQL to DISPATCH_LEVEL */ - spin_unlock(&lock); - - /* create slab memory caches for page alloctors */ - cfs_page_t_slab = cfs_mem_cache_create( - "CPGT", sizeof(cfs_page_t), 0, 0 ); - - cfs_page_p_slab = cfs_mem_cache_create( - "CPGP", CFS_PAGE_SIZE, 0, 0 ); - - if ( cfs_page_t_slab == NULL || - cfs_page_p_slab == NULL ){ - rc = -ENOMEM; - goto errorout; - } - - rc = init_task_manager(); - - if (rc != 0) { - cfs_enter_debugger(); - KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing task manager ...\n")); - goto errorout; - } - - /* initialize the proc file system */ - rc = proc_init_fs(); - - if (rc != 0) { - cfs_enter_debugger(); - KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing proc fs ...\n")); - cleanup_task_manager(); - goto errorout; - } - - /* initialize the tdi data */ - rc = ks_init_tdi_data(); - - if (rc != 0) { - cfs_enter_debugger(); - KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing tdi ...\n")); - proc_destroy_fs(); - cleanup_task_manager(); - goto errorout; - } - -errorout: - - if (rc != 0) { - /* destroy the taskslot cache slab */ - if (cfs_page_t_slab) { - cfs_mem_cache_destroy(cfs_page_t_slab); - } - if (cfs_page_p_slab) { - cfs_mem_cache_destroy(cfs_page_p_slab); - } - } - - return rc; -} - -void -libcfs_arch_cleanup(void) -{ - /* finialize the tdi data */ - ks_fini_tdi_data(); - - /* detroy the whole proc fs tree and nodes */ - proc_destroy_fs(); - - /* destroy the taskslot cache slab */ - if (cfs_page_t_slab) { - cfs_mem_cache_destroy(cfs_page_t_slab); - } - - if (cfs_page_p_slab) { - cfs_mem_cache_destroy(cfs_page_p_slab); - } - - return; -} - -EXPORT_SYMBOL(libcfs_arch_init); -EXPORT_SYMBOL(libcfs_arch_cleanup); diff --git a/lnet/libcfs/winnt/winnt-proc.c b/lnet/libcfs/winnt/winnt-proc.c deleted file mode 100644 index cfb8d38176ee04ad6b0b87aa81a683eaef1cf1ba..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-proc.c +++ /dev/null @@ -1,2039 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -#ifdef __KERNEL__ - - -/* - * /proc emulator routines ... - */ - -/* The root node of the proc fs emulation: /proc */ -cfs_proc_entry_t * proc_fs_root = NULL; - - -/* The sys root: /proc/sys */ -cfs_proc_entry_t * proc_sys_root = NULL; - - -/* The sys root: /proc/dev | to implement misc device */ - -cfs_proc_entry_t * proc_dev_root = NULL; - - -/* SLAB object for cfs_proc_entry_t allocation */ - -cfs_mem_cache_t * proc_entry_cache = NULL; - -/* root node for sysctl table */ - -cfs_sysctl_table_header_t root_table_header; - -/* The global lock to protect all the access */ - -#if LIBCFS_PROCFS_SPINLOCK -spinlock_t proc_fs_lock; - -#define INIT_PROCFS_LOCK() spin_lock_init(&proc_fs_lock) -#define LOCK_PROCFS() spin_lock(&proc_fs_lock) -#define UNLOCK_PROCFS() spin_unlock(&proc_fs_lock) - -#else - -mutex_t proc_fs_lock; - -#define INIT_PROCFS_LOCK() init_mutex(&proc_fs_lock) -#define LOCK_PROCFS() mutex_down(&proc_fs_lock) -#define UNLOCK_PROCFS() mutex_up(&proc_fs_lock) - -#endif - -static ssize_t -proc_file_read(struct file * file, const char * buf, size_t nbytes, loff_t *ppos) -{ - char *page; - ssize_t retval=0; - int eof=0; - ssize_t n, count; - char *start; - cfs_proc_entry_t * dp; - - dp = (cfs_proc_entry_t *) file->private_data; - if (!(page = (char*) cfs_alloc(CFS_PAGE_SIZE, 0))) - return -ENOMEM; - - while ((nbytes > 0) && !eof) { - - count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); - - start = NULL; - if (dp->read_proc) { - n = dp->read_proc( page, &start, (long)*ppos, - count, &eof, dp->data); - } else - break; - - if (!start) { - /* - * For proc files that are less than 4k - */ - start = page + *ppos; - n -= (ssize_t)(*ppos); - if (n <= 0) - break; - if (n > count) - n = count; - } - if (n == 0) - break; /* End of file */ - if (n < 0) { - if (retval == 0) - retval = n; - break; - } - - n -= copy_to_user((void *)buf, start, n); - if (n == 0) { - if (retval == 0) - retval = -EFAULT; - break; - } - - *ppos += n; - nbytes -= n; - buf += n; - retval += n; - } - cfs_free(page); - - return retval; -} - -static ssize_t -proc_file_write(struct file * file, const char * buffer, - size_t count, loff_t *ppos) -{ - cfs_proc_entry_t * dp; - - dp = (cfs_proc_entry_t *) file->private_data; - - if (!dp->write_proc) - return -EIO; - - /* FIXME: does this routine need ppos? probably... */ - return dp->write_proc(file, buffer, count, dp->data); -} - -struct file_operations proc_file_operations = { - /*lseek:*/ NULL, //proc_file_lseek, - /*read:*/ proc_file_read, - /*write:*/ proc_file_write, - /*ioctl:*/ NULL, - /*open:*/ NULL, - /*release:*/ NULL -}; - -/* allocate proc entry block */ - -cfs_proc_entry_t * -proc_alloc_entry() -{ - cfs_proc_entry_t * entry = NULL; - - entry = cfs_mem_cache_alloc(proc_entry_cache, 0); - if (!entry) { - return NULL; - } - - memset(entry, 0, sizeof(cfs_proc_entry_t)); - - entry->magic = CFS_PROC_ENTRY_MAGIC; - RtlInitializeSplayLinks(&(entry->s_link)); - entry->proc_fops = &proc_file_operations; - - return entry; -} - -/* free the proc entry block */ - -void -proc_free_entry(cfs_proc_entry_t * entry) - -{ - ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC); - - cfs_mem_cache_free(proc_entry_cache, entry); -} - -/* dissect the path string for a given full proc path */ - -void -proc_dissect_name( - char *path, - char **first, - int *first_len, - char **remain - ) -{ - int i = 0, j = 0, len = 0; - - *first = *remain = NULL; - *first_len = 0; - - len = strlen(path); - - while (i < len && (path[i] == '/')) i++; - - if (i < len) { - - *first = path + i; - while (i < len && (path[i] != '/')) i++; - *first_len = (path + i - *first); - - if (i + 1 < len) { - *remain = path + i + 1; - } - } -} - -/* search the children entries of the parent entry */ - -cfs_proc_entry_t * -proc_search_splay ( - cfs_proc_entry_t * parent, - char * name - ) -{ - cfs_proc_entry_t * node; - PRTL_SPLAY_LINKS link; - - ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); - - link = parent->root; - - while (link) { - - ANSI_STRING ename,nname; - long result; - - node = CONTAINING_RECORD(link, cfs_proc_entry_t, s_link); - - ASSERT(node->magic == CFS_PROC_ENTRY_MAGIC); - - /* Compare the prefix in the tree with the full name */ - - RtlInitAnsiString(&ename, name); - RtlInitAnsiString(&nname, node->name); - - result = RtlCompareString(&nname, &ename,TRUE); - - if (result > 0) { - - /* The prefix is greater than the full name - so we go down the left child */ - - link = RtlLeftChild(link); - - } else if (result < 0) { - - /* The prefix is less than the full name - so we go down the right child */ - // - - link = RtlRightChild(link); - - } else { - - /* We got the entry in the splay tree and - make it root node instead */ - - parent->root = RtlSplay(link); - - return node; - } - - /* we need continue searching down the tree ... */ - } - - /* There's no the exptected entry in the splay tree */ - - return NULL; -} - -int -proc_insert_splay ( - cfs_proc_entry_t * parent, - cfs_proc_entry_t * child - ) -{ - cfs_proc_entry_t * entry; - - ASSERT(parent != NULL && child != NULL); - ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); - - if (!parent->root) { - parent->root = &(child->s_link); - } else { - entry = CONTAINING_RECORD(parent->root, cfs_proc_entry_t, s_link); - while (TRUE) { - long result; - ANSI_STRING ename, cname; - - ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC); - - RtlInitAnsiString(&ename, entry->name); - RtlInitAnsiString(&cname, child->name); - - result = RtlCompareString(&ename, &cname,TRUE); - - if (result == 0) { - cfs_enter_debugger(); - if (entry == child) { - break; - } - return FALSE; - } - - if (result > 0) { - if (RtlLeftChild(&entry->s_link) == NULL) { - RtlInsertAsLeftChild(&entry->s_link, &child->s_link); - break; - } else { - entry = CONTAINING_RECORD( RtlLeftChild(&entry->s_link), - cfs_proc_entry_t, s_link); - } - } else { - if (RtlRightChild(&entry->s_link) == NULL) { - RtlInsertAsRightChild(&entry->s_link, &child->s_link); - break; - } else { - entry = CONTAINING_RECORD( RtlRightChild(&entry->s_link), - cfs_proc_entry_t, s_link ); - } - } - } - } - - cfs_set_flag(child->flags, CFS_PROC_FLAG_ATTACHED); - parent->nlink++; - - return TRUE; -} - - -/* remove a child entry from the splay tree */ -int -proc_remove_splay ( - cfs_proc_entry_t * parent, - cfs_proc_entry_t * child - ) -{ - cfs_proc_entry_t * entry = NULL; - - ASSERT(parent != NULL && child != NULL); - ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); - ASSERT(cfs_is_flag_set(child->flags, CFS_PROC_FLAG_ATTACHED)); - - entry = proc_search_splay(parent, child->name); - - if (entry) { - ASSERT(entry == child); - parent->root = RtlDelete(&(entry->s_link)); - parent->nlink--; - } else { - cfs_enter_debugger(); - return FALSE; - } - - return TRUE; -} - - -/* search a node inside the proc fs tree */ - -cfs_proc_entry_t * -proc_search_entry( - char * name, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t * entry; - cfs_proc_entry_t * parent; - char *first, *remain; - int flen; - char *ename = NULL; - - parent = root; - entry = NULL; - - ename = cfs_alloc(0x21, CFS_ALLOC_ZERO); - - if (ename == NULL) { - goto errorout; - } - -again: - - /* dissect the file name string */ - proc_dissect_name(name, &first, &flen, &remain); - - if (first) { - - if (flen >= 0x20) { - cfs_enter_debugger(); - entry = NULL; - goto errorout; - } - - memset(ename, 0, 0x20); - memcpy(ename, first, flen); - - entry = proc_search_splay(parent, ename); - - if (!entry) { - goto errorout; - } - - if (remain) { - name = remain; - parent = entry; - - goto again; - } - } - -errorout: - - if (ename) { - cfs_free(ename); - } - - return entry; -} - -/* insert the path nodes to the proc fs tree */ - -cfs_proc_entry_t * -proc_insert_entry( - char * name, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t *entry; - cfs_proc_entry_t *parent; - char *first, *remain; - int flen; - char ename[0x20]; - - parent = root; - entry = NULL; - -again: - - proc_dissect_name(name, &first, &flen, &remain); - - if (first) { - - if (flen >= 0x20) { - return NULL; - } - - memset(ename, 0, 0x20); - memcpy(ename, first, flen); - - entry = proc_search_splay(parent, ename); - - if (!entry) { - entry = proc_alloc_entry(); - memcpy(entry->name, ename, flen); - - if (entry) { - if(!proc_insert_splay(parent, entry)) { - proc_free_entry(entry); - entry = NULL; - } - } - } - - if (!entry) { - return NULL; - } - - if (remain) { - entry->mode |= S_IFDIR | S_IRUGO | S_IXUGO; - cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY); - name = remain; - parent = entry; - goto again; - } - } - - return entry; -} - -/* remove the path nodes from the proc fs tree */ - -void -proc_remove_entry( - char * name, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t *entry; - char *first, *remain; - int flen; - char ename[0x20]; - - entry = NULL; - - proc_dissect_name(name, &first, &flen, &remain); - - if (first) { - - memset(ename, 0, 0x20); - memcpy(ename, first, flen); - - entry = proc_search_splay(root, ename); - - if (entry) { - - if (remain) { - ASSERT(S_ISDIR(entry->mode)); - proc_remove_entry(remain, entry); - } - - if (!entry->nlink) { - proc_remove_splay(root, entry); - proc_free_entry(entry); - } - } - } else { - cfs_enter_debugger(); - } -} - -/* create proc entry and insert it into the proc fs */ - -cfs_proc_entry_t * -create_proc_entry ( - char * name, - mode_t mode, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t *parent = root; - cfs_proc_entry_t *entry = NULL; - - if (S_ISDIR(mode)) { - if ((mode & S_IALLUGO) == 0) - mode |= S_IRUGO | S_IXUGO; - } else { - if ((mode & S_IFMT) == 0) - mode |= S_IFREG; - if ((mode & S_IALLUGO) == 0) - mode |= S_IRUGO; - } - - LOCK_PROCFS(); - - ASSERT(NULL != proc_fs_root); - - if (!parent) { - parent = proc_fs_root; - } - - entry = proc_search_entry(name, parent); - - if (!entry) { - entry = proc_insert_entry(name, parent); - if (!entry) { - /* Failed to create/insert the splay node ... */ - cfs_enter_debugger(); - goto errorout; - } - /* Initializing entry ... */ - entry->mode = mode; - - if (S_ISDIR(mode)) { - cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY); - } - } - -errorout: - - UNLOCK_PROCFS(); - - return entry; -} - - -/* search the specified entry form the proc fs */ - -cfs_proc_entry_t * -search_proc_entry( - char * name, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t * entry; - - LOCK_PROCFS(); - if (root == NULL) { - root = proc_fs_root; - } - entry = proc_search_entry(name, root); - UNLOCK_PROCFS(); - - return entry; -} - -/* remove the entry from the proc fs */ - -void -remove_proc_entry( - char * name, - cfs_proc_entry_t * parent - ) -{ - LOCK_PROCFS(); - if (parent == NULL) { - parent = proc_fs_root; - } - proc_remove_entry(name, parent); - UNLOCK_PROCFS(); -} - - -void proc_destroy_splay(cfs_proc_entry_t * entry) -{ - cfs_proc_entry_t * node; - - if (S_ISDIR(entry->mode)) { - - while (entry->root) { - node = CONTAINING_RECORD(entry->root, cfs_proc_entry_t, s_link); - entry->root = RtlDelete(&(node->s_link)); - proc_destroy_splay(node); - } - } - - proc_free_entry(entry); -} - - -/* destory the whole proc fs tree */ - -void proc_destroy_fs() -{ - LOCK_PROCFS(); - - if (proc_fs_root) { - proc_destroy_splay(proc_fs_root); - } - - if (proc_entry_cache) { - cfs_mem_cache_destroy(proc_entry_cache); - } - - UNLOCK_PROCFS(); -} - -/* initilaize / build the proc fs tree */ - -int proc_init_fs() -{ - cfs_proc_entry_t * root = NULL; - - memset(&(root_table_header), 0, sizeof(struct ctl_table_header)); - INIT_LIST_HEAD(&(root_table_header.ctl_entry)); - - INIT_PROCFS_LOCK(); - proc_entry_cache = cfs_mem_cache_create( - NULL, - sizeof(cfs_proc_entry_t), - 0, - 0 - ); - - if (!proc_entry_cache) { - return (-ENOMEM); - } - - root = proc_alloc_entry(); - - if (!root) { - proc_destroy_fs(); - return (-ENOMEM); - } - - root->magic = CFS_PROC_ENTRY_MAGIC; - root->flags = CFS_PROC_FLAG_DIRECTORY; - root->mode = S_IFDIR | S_IRUGO | S_IXUGO; - root->nlink = 3; // root should never be deleted. - - root->name[0]='p'; - root->name[1]='r'; - root->name[2]='o'; - root->name[3]='c'; - - proc_fs_root = root; - - proc_sys_root = create_proc_entry("sys", S_IFDIR, root); - - if (!proc_sys_root) { - proc_free_entry(root); - proc_fs_root = NULL; - proc_destroy_fs(); - return (-ENOMEM); - } - - proc_sys_root->nlink = 1; - - proc_dev_root = create_proc_entry("dev", S_IFDIR, root); - - if (!proc_dev_root) { - proc_free_entry(proc_sys_root); - proc_sys_root = NULL; - proc_free_entry(proc_fs_root); - proc_fs_root = NULL; - proc_destroy_fs(); - return (-ENOMEM); - } - - proc_dev_root->nlink = 1; - - return 0; -} - - -static ssize_t do_rw_proc(int write, struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - int op; - cfs_proc_entry_t *de; - struct ctl_table *table; - size_t res; - ssize_t error; - - de = (cfs_proc_entry_t *) file->proc_dentry; - - if (!de || !de->data) - return -ENOTDIR; - table = (struct ctl_table *) de->data; - if (!table || !table->proc_handler) - return -ENOTDIR; - op = (write ? 002 : 004); - -// if (ctl_perm(table, op)) -// return -EPERM; - - res = count; - - /* - * FIXME: we need to pass on ppos to the handler. - */ - - error = (*table->proc_handler) (table, write, file, buf, &res); - if (error) - return error; - return res; -} - -static ssize_t proc_readsys(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - return do_rw_proc(0, file, buf, count, ppos); -} - -static ssize_t proc_writesys(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - return do_rw_proc(1, file, (char *) buf, count, ppos); -} - - -struct file_operations proc_sys_file_operations = { - /*lseek:*/ NULL, - /*read:*/ proc_readsys, - /*write:*/ proc_writesys, - /*ioctl:*/ NULL, - /*open:*/ NULL, - /*release:*/ NULL -}; - - -/* Scan the sysctl entries in table and add them all into /proc */ -void register_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t * root) -{ - cfs_proc_entry_t * de; - int len; - mode_t mode; - - for (; table->ctl_name; table++) { - /* Can't do anything without a proc name. */ - if (!table->procname) - continue; - /* Maybe we can't do anything with it... */ - if (!table->proc_handler && !table->child) { - printk(KERN_WARNING "SYSCTL: Can't register %s\n", - table->procname); - continue; - } - - len = strlen(table->procname); - mode = table->mode; - - de = NULL; - if (table->proc_handler) - mode |= S_IFREG; - else { - de = search_proc_entry(table->procname, root); - if (de) { - break; - } - /* If the subdir exists already, de is non-NULL */ - } - - if (!de) { - - de = create_proc_entry((char *)table->procname, mode, root); - if (!de) - continue; - de->data = (void *) table; - if (table->proc_handler) { - de->proc_fops = &proc_sys_file_operations; - } - } - table->de = de; - if (de->mode & S_IFDIR) - register_proc_table(table->child, de); - } -} - - -/* - * Unregister a /proc sysctl table and any subdirectories. - */ -void unregister_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t *root) -{ - cfs_proc_entry_t *de; - for (; table->ctl_name; table++) { - if (!(de = table->de)) - continue; - if (de->mode & S_IFDIR) { - if (!table->child) { - printk (KERN_ALERT "Help - malformed sysctl tree on free\n"); - continue; - } - unregister_proc_table(table->child, de); - - /* Don't unregister directories which still have entries.. */ - if (de->nlink) - continue; - } - - /* Don't unregister proc entries that are still being used.. */ - if (de->nlink) - continue; - - table->de = NULL; - remove_proc_entry((char *)table->procname, root); - } -} - -/* The generic string strategy routine: */ -int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, void **context) -{ - int l, len; - - if (!table->data || !table->maxlen) - return -ENOTDIR; - - if (oldval && oldlenp) { - if(get_user(len, oldlenp)) - return -EFAULT; - if (len) { - l = strlen(table->data); - if (len > l) len = l; - if (len >= table->maxlen) - len = table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(0, ((char *) oldval) + len)) - return -EFAULT; - if(put_user(len, oldlenp)) - return -EFAULT; - } - } - if (newval && newlen) { - len = newlen; - if (len > table->maxlen) - len = table->maxlen; - if(copy_from_user(table->data, newval, len)) - return -EFAULT; - if (len == table->maxlen) - len--; - ((char *) table->data)[len] = 0; - } - return 0; -} - -/** - * simple_strtoul - convert a string to an unsigned long - * @cp: The start of the string - * @endp: A pointer to the end of the parsed string will be placed here - * @base: The number base to use - */ -unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) -{ - unsigned long result = 0, value; - - if (!base) { - base = 10; - if (*cp == '0') { - base = 8; - cp++; - if ((*cp == 'x') && isxdigit(cp[1])) { - cp++; - base = 16; - } - } - } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { - result = result*base + value; - cp++; - } - if (endp) - *endp = (char *)cp; - return result; -} - -#define OP_SET 0 -#define OP_AND 1 -#define OP_OR 2 -#define OP_MAX 3 -#define OP_MIN 4 - - -static int do_proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp, int conv, int op) -{ - int *i, vleft, first=1, neg, val; - size_t left, len; - - #define TMPBUFLEN 20 - char buf[TMPBUFLEN], *p; - - if (!table->data || !table->maxlen || !*lenp) - { - *lenp = 0; - return 0; - } - - i = (int *) table->data; - vleft = table->maxlen / sizeof(int); - left = *lenp; - - for (; left && vleft--; i++, first=0) { - if (write) { - while (left) { - char c; - if(get_user(c,(char *) buffer)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - ((char *) buffer)++; - } - if (!left) - break; - neg = 0; - len = left; - if (len > TMPBUFLEN-1) - len = TMPBUFLEN-1; - if(copy_from_user(buf, buffer, len)) - return -EFAULT; - buf[len] = 0; - p = buf; - if (*p == '-' && left > 1) { - neg = 1; - left--, p++; - } - if (*p < '0' || *p > '9') - break; - val = simple_strtoul(p, &p, 0) * conv; - len = p-buf; - if ((len < left) && *p && !isspace(*p)) - break; - if (neg) - val = -val; - (char *)buffer += len; - left -= len; - switch(op) { - case OP_SET: *i = val; break; - case OP_AND: *i &= val; break; - case OP_OR: *i |= val; break; - case OP_MAX: if(*i < val) - *i = val; - break; - case OP_MIN: if(*i > val) - *i = val; - break; - } - } else { - p = buf; - if (!first) - *p++ = '\t'; - sprintf(p, "%d", (*i) / conv); - len = strlen(buf); - if (len > left) - len = left; - if(copy_to_user(buffer, buf, len)) - return -EFAULT; - left -= len; - (char *)buffer += len; - } - } - - if (!write && !first && left) { - if(put_user('\n', (char *) buffer)) - return -EFAULT; - left--, ((char *)buffer)++; - } - if (write) { - p = (char *) buffer; - while (left) { - char c; - if(get_user(c, p++)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - } - } - if (write && first) - return -EINVAL; - *lenp -= left; - memset(&(filp->f_pos) , 0, sizeof(loff_t)); - filp->f_pos += (loff_t)(*lenp); - return 0; -} - -/** - * proc_dointvec - read a vector of integers - * @table: the sysctl table - * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure - * @buffer: the user buffer - * @lenp: the size of the user buffer - * - * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string. - * - * Returns 0 on success. - */ -int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp) -{ - return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET); -} - - -/** - * proc_dostring - read a string sysctl - * @table: the sysctl table - * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure - * @buffer: the user buffer - * @lenp: the size of the user buffer - * - * Reads/writes a string from/to the user buffer. If the kernel - * buffer provided is not large enough to hold the string, the - * string is truncated. The copied string is %NULL-terminated. - * If the string is being read by the user process, it is copied - * and a newline '\n' is added. It is truncated if the buffer is - * not large enough. - * - * Returns 0 on success. - */ -int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp) -{ - size_t len; - char *p, c; - - if (!table->data || !table->maxlen || !*lenp || - (filp->f_pos && !write)) { - *lenp = 0; - return 0; - } - - if (write) { - len = 0; - p = buffer; - while (len < *lenp) { - if(get_user(c, p++)) - return -EFAULT; - if (c == 0 || c == '\n') - break; - len++; - } - if (len >= (size_t)table->maxlen) - len = (size_t)table->maxlen-1; - if(copy_from_user(table->data, buffer, len)) - return -EFAULT; - ((char *) table->data)[len] = 0; - filp->f_pos += *lenp; - } else { - len = (size_t)strlen(table->data); - if (len > (size_t)table->maxlen) - len = (size_t)table->maxlen; - if (len > *lenp) - len = *lenp; - if (len) - if(copy_to_user(buffer, table->data, len)) - return -EFAULT; - if (len < *lenp) { - if(put_user('\n', ((char *) buffer) + len)) - return -EFAULT; - len++; - } - *lenp = len; - filp->f_pos += len; - } - return 0; -} - -/* Perform the actual read/write of a sysctl table entry. */ -int do_sysctl_strategy (cfs_sysctl_table_t *table, - int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, void **context) -{ - int op = 0, rc; - size_t len; - - if (oldval) - op |= 004; - if (newval) - op |= 002; - - if (table->strategy) { - rc = table->strategy(table, name, nlen, oldval, oldlenp, - newval, newlen, context); - if (rc < 0) - return rc; - if (rc > 0) - return 0; - } - - /* If there is no strategy routine, or if the strategy returns - * zero, proceed with automatic r/w */ - if (table->data && table->maxlen) { - if (oldval && oldlenp) { - get_user(len, oldlenp); - if (len) { - if (len > (size_t)table->maxlen) - len = (size_t)table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(len, oldlenp)) - return -EFAULT; - } - } - if (newval && newlen) { - len = newlen; - if (len > (size_t)table->maxlen) - len = (size_t)table->maxlen; - if(copy_from_user(table->data, newval, len)) - return -EFAULT; - } - } - return 0; -} - -static int parse_table(int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, - cfs_sysctl_table_t *table, void **context) -{ - int n; - -repeat: - - if (!nlen) - return -ENOTDIR; - if (get_user(n, name)) - return -EFAULT; - for ( ; table->ctl_name; table++) { - if (n == table->ctl_name || table->ctl_name == CTL_ANY) { - int error; - if (table->child) { -/* - if (ctl_perm(table, 001)) - return -EPERM; -*/ - if (table->strategy) { - error = table->strategy( - table, name, nlen, - oldval, oldlenp, - newval, newlen, context); - if (error) - return error; - } - name++; - nlen--; - table = table->child; - goto repeat; - } - error = do_sysctl_strategy(table, name, nlen, - oldval, oldlenp, - newval, newlen, context); - return error; - } - } - return -ENOTDIR; -} - -int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp, - void *newval, size_t newlen) -{ - struct list_head *tmp; - - if (nlen <= 0 || nlen >= CTL_MAXNAME) - return -ENOTDIR; - if (oldval) { - int old_len; - if (!oldlenp || get_user(old_len, oldlenp)) - return -EFAULT; - } - tmp = &root_table_header.ctl_entry; - do { - struct ctl_table_header *head = - list_entry(tmp, struct ctl_table_header, ctl_entry); - void *context = NULL; - int error = parse_table(name, nlen, oldval, oldlenp, - newval, newlen, head->ctl_table, - &context); - if (context) - cfs_free(context); - if (error != -ENOTDIR) - return error; - tmp = tmp->next; - } while (tmp != &root_table_header.ctl_entry); - return -ENOTDIR; -} - -/** - * register_sysctl_table - register a sysctl heirarchy - * @table: the top-level table structure - * @insert_at_head: whether the entry should be inserted in front or at the end - * - * Register a sysctl table heirarchy. @table should be a filled in ctl_table - * array. An entry with a ctl_name of 0 terminates the table. - * - * The members of the &ctl_table structure are used as follows: - * - * ctl_name - This is the numeric sysctl value used by sysctl(2). The number - * must be unique within that level of sysctl - * - * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not - * enter a sysctl file - * - * data - a pointer to data for use by proc_handler - * - * maxlen - the maximum size in bytes of the data - * - * mode - the file permissions for the /proc/sys file, and for sysctl(2) - * - * child - a pointer to the child sysctl table if this entry is a directory, or - * %NULL. - * - * proc_handler - the text handler routine (described below) - * - * strategy - the strategy routine (described below) - * - * de - for internal use by the sysctl routines - * - * extra1, extra2 - extra pointers usable by the proc handler routines - * - * Leaf nodes in the sysctl tree will be represented by a single file - * under /proc; non-leaf nodes will be represented by directories. - * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table - * struct enable minimal validation of the values being written to be - * performed, and the mode field allows minimal authentication. - * - * More sophisticated management can be enabled by the provision of a - * strategy routine with the table entry. This will be called before - * any automatic read or write of the data is performed. - * - * The strategy routine may return - * - * < 0 - Error occurred (error is passed to user process) - * - * 0 - OK - proceed with automatic read or write. - * - * > 0 - OK - read or write has been done by the strategy routine, so - * return immediately. - * - * There must be a proc_handler routine for any terminal nodes - * mirrored under /proc/sys (non-terminals are handled by a built-in - * directory handler). Several default handlers are available to - * cover common cases - - * - * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), - * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(), - * proc_doulongvec_minmax() - * - * It is the handler's job to read the input buffer from user memory - * and process it. The handler should return 0 on success. - * - * This routine returns %NULL on a failure to register, and a pointer - * to the table header on success. - */ -struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table, - int insert_at_head) -{ - struct ctl_table_header *tmp; - tmp = cfs_alloc(sizeof(struct ctl_table_header), 0); - if (!tmp) - return NULL; - tmp->ctl_table = table; - - INIT_LIST_HEAD(&tmp->ctl_entry); - if (insert_at_head) - list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); - else - list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); -#ifdef CONFIG_PROC_FS - register_proc_table(table, proc_sys_root); -#endif - return tmp; -} - -/** - * unregister_sysctl_table - unregister a sysctl table heirarchy - * @header: the header returned from register_sysctl_table - * - * Unregisters the sysctl table and all children. proc entries may not - * actually be removed until they are no longer used by anyone. - */ -void unregister_sysctl_table(struct ctl_table_header * header) -{ - list_del(&header->ctl_entry); -#ifdef CONFIG_PROC_FS - unregister_proc_table(header->ctl_table, proc_sys_root); -#endif - cfs_free(header); -} - - -int cfs_psdev_register(cfs_psdev_t * psdev) -{ - cfs_proc_entry_t * entry; - - entry = create_proc_entry ( - (char *)psdev->name, - S_IFREG, - proc_dev_root - ); - - if (!entry) { - return -ENOMEM; - } - - entry->flags |= CFS_PROC_FLAG_MISCDEV; - - entry->proc_fops = psdev->fops; - entry->data = (void *)psdev; - - return 0; -} - -int cfs_psdev_deregister(cfs_psdev_t * psdev) -{ - cfs_proc_entry_t * entry; - - entry = search_proc_entry ( - (char *)psdev->name, - proc_dev_root - ); - - if (entry) { - - ASSERT(entry->data == (void *)psdev); - ASSERT(entry->flags & CFS_PROC_FLAG_MISCDEV); - - remove_proc_entry( - (char *)psdev->name, - proc_dev_root - ); - } - - return 0; -} - -extern char debug_file_path[1024]; - -#define PSDEV_LNET (0x100) -enum { - PSDEV_DEBUG = 1, /* control debugging */ - PSDEV_SUBSYSTEM_DEBUG, /* control debugging */ - PSDEV_PRINTK, /* force all messages to console */ - PSDEV_CONSOLE_RATELIMIT, /* rate limit console messages */ - PSDEV_DEBUG_PATH, /* crashdump log location */ - PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */ - PSDEV_LIBCFS_MEMUSED, /* bytes currently PORTAL_ALLOCated */ -}; - -static struct ctl_table lnet_table[] = { - {PSDEV_DEBUG, "debug", &libcfs_debug, sizeof(int), 0644, NULL, - &proc_dointvec}, - {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &libcfs_subsystem_debug, - sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_PRINTK, "printk", &libcfs_printk, sizeof(int), 0644, NULL, - &proc_dointvec}, - {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit", &libcfs_console_ratelimit, - sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_DEBUG_PATH, "debug_path", debug_file_path, - sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string}, -/* - {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall, - sizeof(portals_upcall), 0644, NULL, &proc_dostring, - &sysctl_string}, -*/ - {PSDEV_LIBCFS_MEMUSED, "memused", (int *)&libcfs_kmemory.counter, - sizeof(int), 0644, NULL, &proc_dointvec}, - {0} -}; - -static struct ctl_table top_table[2] = { - {PSDEV_LNET, "lnet", NULL, 0, 0555, lnet_table}, - {0} -}; - - -int trace_write_dump_kernel(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - int rc = trace_dump_debug_buffer_usrstr(buffer, count); - - return (rc < 0) ? rc : count; -} - -int trace_write_daemon_file(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - int rc = trace_daemon_command_usrstr(buffer, count); - - return (rc < 0) ? rc : count; -} - -int trace_read_daemon_file(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - int rc; - - tracefile_read_lock(); - - rc = trace_copyout_string(page, count, tracefile, "\n"); - - tracefile_read_unlock(); - - return rc; -} - -int trace_write_debug_mb(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - int rc = trace_set_debug_mb_userstr(buffer, count); - - return (rc < 0) ? rc : count; -} - -int trace_read_debug_mb(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - char str[32]; - - snprintf(str, sizeof(str), "%d\n", trace_get_debug_mb()); - - return trace_copyout_string(page, count, str, NULL); -} - -int insert_proc(void) -{ - cfs_proc_entry_t *ent; - - ent = create_proc_entry("sys/lnet/dump_kernel", 0, NULL); - if (ent == NULL) { - CERROR(("couldn't register dump_kernel\n")); - return -1; - } - ent->write_proc = trace_write_dump_kernel; - - ent = create_proc_entry("sys/lnet/daemon_file", 0, NULL); - if (ent == NULL) { - CERROR(("couldn't register daemon_file\n")); - return -1; - } - ent->write_proc = trace_write_daemon_file; - ent->read_proc = trace_read_daemon_file; - - ent = create_proc_entry("sys/lnet/debug_mb", 0, NULL); - if (ent == NULL) { - CERROR(("couldn't register debug_mb\n")); - return -1; - } - ent->write_proc = trace_write_debug_mb; - ent->read_proc = trace_read_debug_mb; - - return 0; -} - -void remove_proc(void) -{ - remove_proc_entry("sys/portals/dump_kernel", NULL); - remove_proc_entry("sys/portals/daemon_file", NULL); - remove_proc_entry("sys/portals/debug_mb", NULL); - -#ifdef CONFIG_SYSCTL - if (portals_table_header) - unregister_sysctl_table(portals_table_header); - portals_table_header = NULL; -#endif -} - - -/* - * proc process routines of kernel space - */ - -cfs_file_t * -lustre_open_file(char * filename) -{ - int rc = 0; - cfs_file_t * fh = NULL; - cfs_proc_entry_t * fp = NULL; - - fp = search_proc_entry(filename, proc_fs_root); - - if (!fp) { - rc = -ENOENT; - return NULL; - } - - fh = cfs_alloc(sizeof(cfs_file_t), CFS_ALLOC_ZERO); - - if (!fh) { - rc = -ENOMEM; - return NULL; - } - - fh->private_data = (void *)fp; - fh->f_op = fp->proc_fops; - - if (fh->f_op->open) { - rc = (fh->f_op->open)(fh); - } else { - fp->nlink++; - } - - if (0 != rc) { - cfs_free(fh); - return NULL; - } - - return fh; -} - -int -lustre_close_file(cfs_file_t * fh) -{ - int rc = 0; - cfs_proc_entry_t * fp = NULL; - - fp = (cfs_proc_entry_t *) fh->private_data; - - if (fh->f_op->release) { - rc = (fh->f_op->release)(fh); - } else { - fp->nlink--; - } - - cfs_free(fh); - - return rc; -} - -int -lustre_do_ioctl( cfs_file_t * fh, - unsigned long cmd, - ulong_ptr arg ) -{ - int rc = 0; - - if (fh->f_op->ioctl) { - rc = (fh->f_op->ioctl)(fh, cmd, arg); - } - - if (rc != 0) { - printk("lustre_do_ioctl: fialed: cmd = %xh arg = %xh rc = %d\n", - cmd, arg, rc); - } - - return rc; -} - -int -lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl) -{ - int rc = 0; - ulong_ptr data; - - data = (ulong_ptr)devctl + sizeof(CFS_PROC_IOCTL); - - /* obd ioctl code */ - if (_IOC_TYPE(devctl->cmd) == 'f') { -#if 0 - struct obd_ioctl_data * obd = (struct obd_ioctl_data *) data; - - if ( devctl->cmd != (ULONG)OBD_IOC_BRW_WRITE && - devctl->cmd != (ULONG)OBD_IOC_BRW_READ ) { - - unsigned long off = obd->ioc_len; - - if (obd->ioc_pbuf1) { - obd->ioc_pbuf1 = (char *)(data + off); - off += size_round(obd->ioc_plen1); - } - - if (obd->ioc_pbuf2) { - obd->ioc_pbuf2 = (char *)(data + off); - } - } - #endif - } - - rc = lustre_do_ioctl(fh, devctl->cmd, data); - - return rc; -} - - -size_t -lustre_read_file( - cfs_file_t * fh, - loff_t off, - size_t size, - char * buf - ) -{ - size_t rc = 0; - - if (fh->f_op->read) { - rc = (fh->f_op->read) (fh, buf, size, &off); - } - - return rc; -} - - -size_t -lustre_write_file( - cfs_file_t * fh, - loff_t off, - size_t size, - char * buf - ) -{ - size_t rc = 0; - - if (fh->f_op->write) { - rc = (fh->f_op->write)(fh, buf, size, &off); - } - - return rc; -} - -#else /* !__KERNEL__ */ - -#include <lnet/api-support.h> -#include <liblustre.h> -#include <lustre_lib.h> - -/* - * proc process routines of user space - */ - -HANDLE cfs_proc_open (char * filename, int oflag) -{ - NTSTATUS status; - IO_STATUS_BLOCK iosb; - int rc; - - HANDLE FileHandle = INVALID_HANDLE_VALUE; - OBJECT_ATTRIBUTES ObjectAttributes; - ACCESS_MASK DesiredAccess; - ULONG CreateDisposition; - ULONG ShareAccess; - ULONG CreateOptions; - UNICODE_STRING UnicodeName; - USHORT NameLength; - - PFILE_FULL_EA_INFORMATION Ea = NULL; - ULONG EaLength; - UCHAR EaBuffer[EA_MAX_LENGTH]; - - /* Check the filename: should start with "/proc" or "/dev" */ - NameLength = (USHORT)strlen(filename); - if (NameLength > 0x05) { - if (_strnicmp(filename, "/proc/", 6) == 0) { - filename += 6; - NameLength -=6; - if (NameLength <= 0) { - rc = -EINVAL; - goto errorout; - } - } else if (_strnicmp(filename, "/dev/", 5) == 0) { - } else { - rc = -EINVAL; - goto errorout; - } - } else { - rc = -EINVAL; - goto errorout; - } - - /* Analyze the flags settings */ - - if (cfs_is_flag_set(oflag, O_WRONLY)) { - DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = 0; - } else if (cfs_is_flag_set(oflag, O_RDWR)) { - DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE; - } else { - DesiredAccess = (GENERIC_READ | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ; - } - - if (cfs_is_flag_set(oflag, O_CREAT)) { - if (cfs_is_flag_set(oflag, O_EXCL)) { - CreateDisposition = FILE_CREATE; - rc = -EINVAL; - goto errorout; - } else { - CreateDisposition = FILE_OPEN_IF; - } - } else { - CreateDisposition = FILE_OPEN; - } - - if (cfs_is_flag_set(oflag, O_TRUNC)) { - if (cfs_is_flag_set(oflag, O_EXCL)) { - CreateDisposition = FILE_OVERWRITE; - } else { - CreateDisposition = FILE_OVERWRITE_IF; - } - } - - CreateOptions = 0; - - if (cfs_is_flag_set(oflag, O_DIRECTORY)) { - cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE); - } - - if (cfs_is_flag_set(oflag, O_SYNC)) { - cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH); - } - - if (cfs_is_flag_set(oflag, O_DIRECT)) { - cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING); - } - - /* Initialize the unicode path name for the specified file */ - RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK); - - /* Setup the object attributes structure for the file. */ - InitializeObjectAttributes( - &ObjectAttributes, - &UnicodeName, - OBJ_CASE_INSENSITIVE, - NULL, - NULL ); - - /* building EA for the proc entry ... */ - Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; - Ea->NextEntryOffset = 0; - Ea->Flags = 0; - Ea->EaNameLength = (UCHAR)NameLength; - Ea->EaValueLength = 0; - RtlCopyMemory( - &(Ea->EaName), - filename, - NameLength + 1 - ); - EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + - Ea->EaNameLength + 1; - - /* Now to open or create the file now */ - status = ZwCreateFile( - &FileHandle, - DesiredAccess, - &ObjectAttributes, - &iosb, - 0, - FILE_ATTRIBUTE_NORMAL, - ShareAccess, - CreateDisposition, - CreateOptions, - Ea, - EaLength ); - - /* Check the returned status of Iosb ... */ - - if (!NT_SUCCESS(status)) { - rc = cfs_error_code(status); - goto errorout; - } - -errorout: - - return FileHandle; -} - -int cfs_proc_close(HANDLE handle) -{ - if (handle) { - NtClose((HANDLE)handle); - } - - return 0; -} - -int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count) -{ - NTSTATUS status; - IO_STATUS_BLOCK iosb; - LARGE_INTEGER offset; - - - offset.QuadPart = 0; - - /* read file data */ - status = NtReadFile( - (HANDLE)handle, - 0, - NULL, - NULL, - &iosb, - buffer, - count, - &offset, - NULL); - - /* check the return status */ - if (!NT_SUCCESS(status)) { - printf("NtReadFile request failed 0x%0x\n", status); - goto errorout; - } - -errorout: - - if (NT_SUCCESS(status)) { - return iosb.Information; - } - - return cfs_error_code(status); -} - - -int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count) -{ - NTSTATUS status; - IO_STATUS_BLOCK iosb; - LARGE_INTEGER offset; - - offset.QuadPart = -1; - - /* write buffer to the opened file */ - status = NtWriteFile( - (HANDLE)handle, - 0, - NULL, - NULL, - &iosb, - buffer, - count, - &offset, - NULL); - - /* check the return status */ - if (!NT_SUCCESS(status)) { - printf("NtWriteFile request failed 0x%0x\n", status); - goto errorout; - } - -errorout: - - if (NT_SUCCESS(status)) { - return iosb.Information; - } - - return cfs_error_code(status); -} - -int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer) -{ - PUCHAR procdat = NULL; - CFS_PROC_IOCTL procctl; - ULONG length = 0; - ULONG extra = 0; - - NTSTATUS status; - IO_STATUS_BLOCK iosb; - - procctl.cmd = cmd; - - if(_IOC_TYPE(cmd) == IOC_LIBCFS_TYPE) { - struct libcfs_ioctl_data * portal; - portal = (struct libcfs_ioctl_data *) buffer; - length = portal->ioc_len; - } else if (_IOC_TYPE(cmd) == 'f') { - struct obd_ioctl_data * obd; - obd = (struct obd_ioctl_data *) buffer; - length = obd->ioc_len; - extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2); - } else if(_IOC_TYPE(cmd) == 'u') { - length = 4; - extra = 0; - } else { - printf("user:winnt-proc:cfs_proc_ioctl: un-supported ioctl type ...\n"); - cfs_enter_debugger(); - status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - procctl.len = length + extra; - procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL)); - - if (NULL == procdat) { - printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n"); - status = STATUS_INSUFFICIENT_RESOURCES; - cfs_enter_debugger(); - goto errorout; - } - memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL)); - memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL)); - memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length); - length += sizeof(CFS_PROC_IOCTL); - - if (_IOC_TYPE(cmd) == 'f') { - - char *ptr; - struct obd_ioctl_data * data; - struct obd_ioctl_data * obd; - - data = (struct obd_ioctl_data *) buffer; - obd = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL)); - ptr = obd->ioc_bulk; - - if (data->ioc_inlbuf1) { - obd->ioc_inlbuf1 = ptr; - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - } - - if (data->ioc_inlbuf2) { - obd->ioc_inlbuf2 = ptr; - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - } - if (data->ioc_inlbuf3) { - obd->ioc_inlbuf3 = ptr; - LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr); - } - if (data->ioc_inlbuf4) { - obd->ioc_inlbuf4 = ptr; - LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr); - } - - if ( cmd != (ULONG)OBD_IOC_BRW_WRITE && - cmd != (ULONG)OBD_IOC_BRW_READ ) { - - if (data->ioc_pbuf1 && data->ioc_plen1) { - obd->ioc_pbuf1 = &procdat[length]; - memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, data->ioc_plen1); - length += size_round(data->ioc_plen1); - } - - if (data->ioc_pbuf2 && data->ioc_plen2) { - obd->ioc_pbuf2 = &procdat[length]; - memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, data->ioc_plen2); - length += size_round(data->ioc_plen2); - } - } - - if (obd_ioctl_is_invalid(obd)) { - cfs_enter_debugger(); - } - } - - status = NtDeviceIoControlFile( - (HANDLE)handle, - NULL, NULL, NULL, &iosb, - IOCTL_LIBCFS_ENTRY, - procdat, length, - procdat, length ); - - - if (NT_SUCCESS(status)) { - memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len); - } - -errorout: - - if (procdat) { - free(procdat); - } - - return cfs_error_code(status); -} - -#endif /* __KERNEL__ */ diff --git a/lnet/libcfs/winnt/winnt-sync.c b/lnet/libcfs/winnt/winnt-sync.c deleted file mode 100644 index 5094befbf148c4670befaeab62adb26eac52680a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-sync.c +++ /dev/null @@ -1,449 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -#define DEBUG_SUBSYSTEM S_LIBCFS - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - - -/* - * Wait queue routines - */ - -/* - * cfs_waitq_init - * To initialize the wait queue - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_init(cfs_waitq_t *waitq) -{ - waitq->magic = CFS_WAITQ_MAGIC; - waitq->flags = 0; - INIT_LIST_HEAD(&(waitq->waiters)); - spin_lock_init(&(waitq->guard)); -} - -/* - * cfs_waitlink_init - * To initialize the wake link node - * - * Arguments: - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitlink_init(cfs_waitlink_t *link) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - cfs_assert(slot->Magic == TASKSLT_MAGIC); - - memset(link, 0, sizeof(cfs_waitlink_t)); - - link->magic = CFS_WAITLINK_MAGIC; - link->flags = 0; - - link->event = &(slot->Event); - link->hits = &(slot->hits); - - atomic_inc(&slot->count); - - INIT_LIST_HEAD(&(link->waitq[0].link)); - INIT_LIST_HEAD(&(link->waitq[1].link)); - - link->waitq[0].waitl = link->waitq[1].waitl = link; -} - - -/* - * cfs_waitlink_fini - * To finilize the wake link node - * - * Arguments: - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitlink_fini(cfs_waitlink_t *link) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - cfs_assert(slot->Magic == TASKSLT_MAGIC); - cfs_assert(link->magic == CFS_WAITLINK_MAGIC); - cfs_assert(link->waitq[0].waitq == NULL); - cfs_assert(link->waitq[1].waitq == NULL); - - atomic_dec(&slot->count); -} - - -/* - * cfs_waitq_add_internal - * To queue the wait link node to the wait queue - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * link: pointer to the cfs_waitlink_t structure - * int: queue no (Normal or Forward waitq) - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_add_internal(cfs_waitq_t *waitq, - cfs_waitlink_t *link, - __u32 waitqid ) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - LASSERT(link->magic == CFS_WAITLINK_MAGIC); - LASSERT(waitqid < CFS_WAITQ_CHANNELS); - - spin_lock(&(waitq->guard)); - LASSERT(link->waitq[waitqid].waitq == NULL); - link->waitq[waitqid].waitq = waitq; - if (link->flags & CFS_WAITQ_EXCLUSIVE) { - list_add_tail(&link->waitq[waitqid].link, &waitq->waiters); - } else { - list_add(&link->waitq[waitqid].link, &waitq->waiters); - } - spin_unlock(&(waitq->guard)); -} -/* - * cfs_waitq_add - * To queue the wait link node to the wait queue - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_add(cfs_waitq_t *waitq, - cfs_waitlink_t *link) -{ - cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_NORMAL); -} - -/* - * cfs_waitq_add_exclusive - * To set the wait link node to exclusive mode - * and queue it to the wait queue - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * link: pointer to the cfs_wait_link structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_add_exclusive( cfs_waitq_t *waitq, - cfs_waitlink_t *link) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - LASSERT(link->magic == CFS_WAITLINK_MAGIC); - - link->flags |= CFS_WAITQ_EXCLUSIVE; - cfs_waitq_add(waitq, link); -} - -/* - * cfs_waitq_forward - * To be determinated. - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_forward( cfs_waitlink_t *link, - cfs_waitq_t *waitq) -{ - cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_FORWARD); -} - -/* - * cfs_waitq_del - * To remove the wait link node from the waitq - * - * Arguments: - * waitq: pointer to the cfs_ waitq_t structure - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_del( cfs_waitq_t *waitq, - cfs_waitlink_t *link) -{ - int i = 0; - - LASSERT(waitq != NULL); - LASSERT(link != NULL); - - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - LASSERT(link->magic == CFS_WAITLINK_MAGIC); - - spin_lock(&(waitq->guard)); - - for (i=0; i < CFS_WAITQ_CHANNELS; i++) { - if (link->waitq[i].waitq == waitq) - break; - } - - if (i < CFS_WAITQ_CHANNELS) { - link->waitq[i].waitq = NULL; - list_del_init(&link->waitq[i].link); - } else { - cfs_enter_debugger(); - } - - spin_unlock(&(waitq->guard)); -} - -/* - * cfs_waitq_active - * Is the waitq active (not empty) ? - * - * Arguments: - * waitq: pointer to the cfs_ waitq_t structure - * - * Return Value: - * Zero: the waitq is empty - * Non-Zero: the waitq is active - * - * Notes: - * We always returns TRUE here, the same to Darwin. - */ - -int cfs_waitq_active(cfs_waitq_t *waitq) -{ - LASSERT(waitq != NULL); - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - - return (1); -} - -/* - * cfs_waitq_signal_nr - * To wake up all the non-exclusive tasks plus nr exclusive - * ones in the waitq - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * nr: number of exclusive tasks to be woken up - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - - -void cfs_waitq_signal_nr(cfs_waitq_t *waitq, int nr) -{ - int result; - cfs_waitlink_channel_t * scan; - - LASSERT(waitq != NULL); - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - - spin_lock(&waitq->guard); - - list_for_each_entry(scan, &waitq->waiters, cfs_waitlink_channel_t, link) { - - cfs_waitlink_t *waitl = scan->waitl; - - result = cfs_wake_event(waitl->event); - LASSERT( result == FALSE || result == TRUE ); - - if (result) { - atomic_inc(waitl->hits); - } - - if ((waitl->flags & CFS_WAITQ_EXCLUSIVE) && --nr == 0) - break; - } - - spin_unlock(&waitq->guard); - return; -} - -/* - * cfs_waitq_signal - * To wake up all the non-exclusive tasks and 1 exclusive - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_signal(cfs_waitq_t *waitq) -{ - cfs_waitq_signal_nr(waitq, 1); -} - - -/* - * cfs_waitq_broadcast - * To wake up all the tasks in the waitq - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_broadcast(cfs_waitq_t *waitq) -{ - LASSERT(waitq != NULL); - LASSERT(waitq->magic ==CFS_WAITQ_MAGIC); - - cfs_waitq_signal_nr(waitq, 0); -} - -/* - * cfs_waitq_wait - * To wait on the link node until it is signaled. - * - * Arguments: - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state) -{ - LASSERT(link != NULL); - LASSERT(link->magic == CFS_WAITLINK_MAGIC); - - if (atomic_read(link->hits) > 0) { - atomic_dec(link->hits); - LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00); - } else { - cfs_wait_event(link->event, 0); - } -} - -/* - * cfs_waitq_timedwait - * To wait the link node to be signaled with a timeout limit - * - * Arguments: - * link: pointer to the cfs_waitlink_t structure - * timeout: the timeout limitation - * - * Return Value: - * Woken up: return the difference of the current time and - * the timeout - * Timeout: return 0 - * - * Notes: - * What if it happens to be woken up at the just timeout time !? - */ - -cfs_duration_t cfs_waitq_timedwait( cfs_waitlink_t *link, - cfs_task_state_t state, - cfs_duration_t timeout) -{ - - if (atomic_read(link->hits) > 0) { - atomic_dec(link->hits); - LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00); - return TRUE; - } - - return (cfs_duration_t)cfs_wait_event(link->event, timeout); -} - - diff --git a/lnet/libcfs/winnt/winnt-tcpip.c b/lnet/libcfs/winnt/winnt-tcpip.c deleted file mode 100644 index d0c725cdc3fdeeffb919875f4175bf0cb9d7163b..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-tcpip.c +++ /dev/null @@ -1,6706 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LIBCFS - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include <lnet/lnet.h> - -#define TDILND_MODULE_NAME L"Tdilnd" - -ks_data_t ks_data; - -ULONG -ks_tdi_send_flags(ULONG SockFlags) -{ - ULONG TdiFlags = 0; - - if (cfs_is_flag_set(SockFlags, MSG_OOB)) { - cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED); - } - - if (cfs_is_flag_set(SockFlags, MSG_MORE)) { - cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL); - } - - if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) { - cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING); - } - - return TdiFlags; -} - -NTSTATUS -KsIrpCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - if (NULL != Context) { - KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE); - } - - return STATUS_MORE_PROCESSING_REQUIRED; - - UNREFERENCED_PARAMETER(DeviceObject); - UNREFERENCED_PARAMETER(Irp); -} - - -/* - * KsBuildTdiIrp - * Allocate a new IRP and initialize it to be issued to tdi - * - * Arguments: - * DeviceObject: device object created by the underlying - * TDI transport driver - * - * Return Value: - * PRIP: the allocated Irp in success or NULL in failure. - * - * NOTES: - * N/A - */ - -PIRP -KsBuildTdiIrp( - IN PDEVICE_OBJECT DeviceObject - ) -{ - PIRP Irp; - PIO_STACK_LOCATION IrpSp; - - // - // Allocating the IRP ... - // - - Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); - - if (NULL != Irp) { - - // - // Getting the Next Stack Location ... - // - - IrpSp = IoGetNextIrpStackLocation(Irp); - - // - // Initializing Irp ... - // - - IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL; - IrpSp->Parameters.DeviceIoControl.IoControlCode = 0; - } - - return Irp; -} - -/* - * KsSubmitTdiIrp - * Issue the Irp to the underlying tdi driver - * - * Arguments: - * DeviceObject: the device object created by TDI driver - * Irp: the I/O request packet to be processed - * bSynchronous: synchronous or not. If true, we need wait - * until the process is finished. - * Information: returned info - * - * Return Value: - * NTSTATUS: kernel status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsSubmitTdiIrp( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN BOOLEAN bSynchronous, - OUT PULONG Information - ) -{ - NTSTATUS Status; - KEVENT Event; - - if (bSynchronous) { - - KeInitializeEvent( - &Event, - SynchronizationEvent, - FALSE - ); - - - IoSetCompletionRoutine( - Irp, - KsIrpCompletionRoutine, - &Event, - TRUE, - TRUE, - TRUE - ); - } - - Status = IoCallDriver(DeviceObject, Irp); - - if (bSynchronous) { - - if (STATUS_PENDING == Status) { - - Status = KeWaitForSingleObject( - &Event, - Executive, - KernelMode, - FALSE, - NULL - ); - } - - Status = Irp->IoStatus.Status; - - if (Information) { - *Information = (ULONG)(Irp->IoStatus.Information); - } - - Irp->MdlAddress = NULL; - IoFreeIrp(Irp); - } - - if (!NT_SUCCESS(Status)) { - - KsPrint((2, "KsSubmitTdiIrp: Error when submitting the Irp: Status = %xh (%s) ...\n", - Status, KsNtStatusToString(Status))); - } - - return (Status); -} - - - -/* - * KsOpenControl - * Open the Control Channel Object ... - * - * Arguments: - * DeviceName: the device name to be opened - * Handle: opened handle in success case - * FileObject: the fileobject of the device - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsOpenControl( - IN PUNICODE_STRING DeviceName, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - OBJECT_ATTRIBUTES ObjectAttributes; - IO_STATUS_BLOCK IoStatus; - - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - // - // Initializing ... - // - - InitializeObjectAttributes( - &ObjectAttributes, - DeviceName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL - ); - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - // - // Creating the Transport Address Object ... - // - - Status = ZwCreateFile( - Handle, - FILE_READ_DATA | FILE_WRITE_DATA, - &ObjectAttributes, - &IoStatus, - 0, - FILE_ATTRIBUTE_NORMAL, - FILE_SHARE_READ | FILE_SHARE_WRITE, - FILE_OPEN, - 0, - NULL, - 0 - ); - - - if (NT_SUCCESS(Status)) { - - // - // Now Obtaining the FileObject of the Transport Address ... - // - - Status = ObReferenceObjectByHandle( - *Handle, - FILE_ANY_ACCESS, - NULL, - KernelMode, - FileObject, - NULL - ); - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - ZwClose(*Handle); - } - - } else { - - cfs_enter_debugger(); - } - - return (Status); -} - - -/* - * KsCloseControl - * Release the Control Channel Handle and FileObject - * - * Arguments: - * Handle: the channel handle to be released - * FileObject: the fileobject to be released - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsCloseControl( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - if (FileObject) { - - ObDereferenceObject(FileObject); - } - - if (Handle) { - - Status = ZwClose(Handle); - } - - ASSERT(NT_SUCCESS(Status)); - - return (Status); -} - - -/* - * KsOpenAddress - * Open the tdi address object - * - * Arguments: - * DeviceName: device name of the address object - * pAddress: tdi address of the address object - * AddressLength: length in bytes of the tdi address - * Handle: the newly opened handle - * FileObject: the newly opened fileobject - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsOpenAddress( - IN PUNICODE_STRING DeviceName, - IN PTRANSPORT_ADDRESS pAddress, - IN ULONG AddressLength, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - PFILE_FULL_EA_INFORMATION Ea = NULL; - ULONG EaLength; - UCHAR EaBuffer[EA_MAX_LENGTH]; - - OBJECT_ATTRIBUTES ObjectAttributes; - IO_STATUS_BLOCK IoStatus; - - // - // Building EA for the Address Object to be Opened ... - // - - Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; - Ea->NextEntryOffset = 0; - Ea->Flags = 0; - Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH; - Ea->EaValueLength = (USHORT)AddressLength; - RtlCopyMemory( - &(Ea->EaName), - TdiTransportAddress, - Ea->EaNameLength + 1 - ); - RtlMoveMemory( - &(Ea->EaName[Ea->EaNameLength + 1]), - pAddress, - AddressLength - ); - EaLength = sizeof(FILE_FULL_EA_INFORMATION) + - Ea->EaNameLength + AddressLength; - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - - // - // Initializing ... - // - - InitializeObjectAttributes( - &ObjectAttributes, - DeviceName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL - ); - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - // - // Creating the Transport Address Object ... - // - - Status = ZwCreateFile( - Handle, - FILE_READ_DATA | FILE_WRITE_DATA, - &ObjectAttributes, - &IoStatus, - 0, - FILE_ATTRIBUTE_NORMAL, - FILE_SHARE_READ | FILE_SHARE_WRITE, /* 0: DON'T REUSE */ - FILE_OPEN, - 0, - Ea, - EaLength - ); - - - if (NT_SUCCESS(Status)) { - - // - // Now Obtaining the FileObject of the Transport Address ... - // - - Status = ObReferenceObjectByHandle( - *Handle, - FILE_ANY_ACCESS, - NULL, - KernelMode, - FileObject, - NULL - ); - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - ZwClose(*Handle); - } - - } else { - - cfs_enter_debugger(); - } - - return (Status); -} - -/* - * KsCloseAddress - * Release the Hanlde and FileObject of an opened tdi - * address object - * - * Arguments: - * Handle: the handle to be released - * FileObject: the fileobject to be released - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsCloseAddress( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject -) -{ - NTSTATUS Status = STATUS_SUCCESS; - - if (FileObject) { - - ObDereferenceObject(FileObject); - } - - if (Handle) { - - Status = ZwClose(Handle); - } - - ASSERT(NT_SUCCESS(Status)); - - return (Status); -} - - -/* - * KsOpenConnection - * Open a tdi connection object - * - * Arguments: - * DeviceName: device name of the connection object - * ConnectionContext: the connection context - * Handle: the newly opened handle - * FileObject: the newly opened fileobject - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsOpenConnection( - IN PUNICODE_STRING DeviceName, - IN CONNECTION_CONTEXT ConnectionContext, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - PFILE_FULL_EA_INFORMATION Ea = NULL; - ULONG EaLength; - UCHAR EaBuffer[EA_MAX_LENGTH]; - - OBJECT_ATTRIBUTES ObjectAttributes; - IO_STATUS_BLOCK IoStatus; - - // - // Building EA for the Address Object to be Opened ... - // - - Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; - Ea->NextEntryOffset = 0; - Ea->Flags = 0; - Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH; - Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT); - RtlCopyMemory( - &(Ea->EaName), - TdiConnectionContext, - Ea->EaNameLength + 1 - ); - RtlMoveMemory( - &(Ea->EaName[Ea->EaNameLength + 1]), - &ConnectionContext, - sizeof(CONNECTION_CONTEXT) - ); - EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + - Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT); - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - - // - // Initializing ... - // - - InitializeObjectAttributes( - &ObjectAttributes, - DeviceName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL - ); - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - // - // Creating the Connection Object ... - // - - Status = ZwCreateFile( - Handle, - FILE_READ_DATA | FILE_WRITE_DATA, - &ObjectAttributes, - &IoStatus, - NULL, - FILE_ATTRIBUTE_NORMAL, - 0, - FILE_OPEN, - 0, - Ea, - EaLength - ); - - - if (NT_SUCCESS(Status)) { - - // - // Now Obtaining the FileObject of the Transport Address ... - // - - Status = ObReferenceObjectByHandle( - *Handle, - FILE_ANY_ACCESS, - NULL, - KernelMode, - FileObject, - NULL - ); - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - ZwClose(*Handle); - } - - } else { - - cfs_enter_debugger(); - } - - return (Status); -} - -/* - * KsCloseConnection - * Release the Hanlde and FileObject of an opened tdi - * connection object - * - * Arguments: - * Handle: the handle to be released - * FileObject: the fileobject to be released - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsCloseConnection( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - if (FileObject) { - - ObDereferenceObject(FileObject); - } - - if (Handle) { - - Status = ZwClose(Handle); - } - - ASSERT(NT_SUCCESS(Status)); - - return (Status); -} - - -/* - * KsAssociateAddress - * Associate an address object with a connection object - * - * Arguments: - * AddressHandle: the handle of the address object - * ConnectionObject: the FileObject of the connection - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsAssociateAddress( - IN HANDLE AddressHandle, - IN PFILE_OBJECT ConnectionObject - ) -{ - NTSTATUS Status; - PDEVICE_OBJECT DeviceObject; - PIRP Irp; - - // - // Getting the DeviceObject from Connection FileObject - // - - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - // - // Building Tdi Internal Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Assocating the Address Object with the Connection Object - // - - TdiBuildAssociateAddress( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL, - AddressHandle - ); - - // - // Calling the Transprot Driver with the Prepared Irp - // - - Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); - } - - return (Status); -} - - -/* - * KsDisassociateAddress - * Disassociate the connection object (the relationship will - * the corresponding address object will be dismissed. ) - * - * Arguments: - * ConnectionObject: the FileObject of the connection - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsDisassociateAddress( - IN PFILE_OBJECT ConnectionObject - ) -{ - NTSTATUS Status; - PDEVICE_OBJECT DeviceObject; - PIRP Irp; - - // - // Getting the DeviceObject from Connection FileObject - // - - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - // - // Building Tdi Internal Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Disassocating the Address Object with the Connection Object - // - - TdiBuildDisassociateAddress( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL - ); - - // - // Calling the Transprot Driver with the Prepared Irp - // - - Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); - } - - return (Status); -} - - -/* - -// -// Connection Control Event Callbacks -// - -TDI_EVENT_CONNECT -TDI_EVENT_DISCONNECT -TDI_EVENT_ERROR - -// -// Tcp Event Callbacks -// - -TDI_EVENT_RECEIVE -TDI_EVENT_RECEIVE_EXPEDITED -TDI_EVENT_CHAINED_RECEIVE -TDI_EVENT_CHAINED_RECEIVE_EXPEDITED - -// -// Udp Event Callbacks -// - -TDI_EVENT_RECEIVE_DATAGRAM -TDI_EVENT_CHAINED_RECEIVE_DATAGRAM - -*/ - - -/* - * KsSetEventHandlers - * Set the tdi event callbacks with an address object - * - * Arguments: - * AddressObject: the FileObject of the address object - * EventContext: the parameter for the callbacks - * Handlers: the handlers indictor array - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * NOTES: - * N/A - */ - -NTSTATUS -KsSetEventHandlers( - IN PFILE_OBJECT AddressObject, // Address File Object - IN PVOID EventContext, // Context for Handlers - IN PKS_EVENT_HANDLERS Handlers // Handlers Indictor - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - PDEVICE_OBJECT DeviceObject; - USHORT i = 0; - - DeviceObject = IoGetRelatedDeviceObject(AddressObject); - - for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) { - - // - // Setup the tdi event callback handler if requested. - // - - if (Handlers->IsActive[i]) { - - PIRP Irp; - - // - // Building Tdi Internal Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Building the Irp to set the Event Handler ... - // - - TdiBuildSetEventHandler( - Irp, - DeviceObject, - AddressObject, - NULL, - NULL, - i, /* tdi event type */ - Handlers->Handler[i], /* tdi event handler */ - EventContext /* context for the handler */ - ); - - // - // Calling the Transprot Driver with the Prepared Irp - // - - Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); - - // - // tcp/ip tdi does not support these two event callbacks - // - - if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE || - i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) { - cfs_enter_debugger(); - Status = STATUS_SUCCESS; - } - } - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - } - } - - -errorout: - - if (!NT_SUCCESS(Status)) { - - KsPrint((2, "KsSetEventHandlers: Error Status = %xh (%s)\n", - Status, KsNtStatusToString(Status) )); - } - - return (Status); -} - - - -/* - * KsQueryAddressInfo - * Query the address of the FileObject specified - * - * Arguments: - * FileObject: the FileObject to be queried - * AddressInfo: buffer to contain the address info - * AddressSize: length of the AddressInfo buffer - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsQueryAddressInfo( - PFILE_OBJECT FileObject, - PTDI_ADDRESS_INFO AddressInfo, - PULONG AddressSize - ) -{ - NTSTATUS Status = STATUS_UNSUCCESSFUL; - PIRP Irp = NULL; - PMDL Mdl; - PDEVICE_OBJECT DeviceObject; - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - DeviceObject = IoGetRelatedDeviceObject(FileObject); - - RtlZeroMemory(AddressInfo, *(AddressSize)); - - // - // Allocating the Tdi Setting Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Locking the User Buffer / Allocating a MDL for it - // - - Status = KsLockUserBuffer( - AddressInfo, - FALSE, - *(AddressSize), - IoModifyAccess, - &Mdl - ); - - if (!NT_SUCCESS(Status)) { - - IoFreeIrp(Irp); - Irp = NULL; - } - } - - if (Irp) { - - LASSERT(NT_SUCCESS(Status)); - - TdiBuildQueryInformation( - Irp, - DeviceObject, - FileObject, - NULL, - NULL, - TDI_QUERY_ADDRESS_INFO, - Mdl - ); - - Status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - AddressSize - ); - - KsReleaseMdl(Mdl, FALSE); - } - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - //TDI_BUFFER_OVERFLOW - } - - return (Status); -} - -/* - * KsQueryProviderInfo - * Query the underlying transport device's information - * - * Arguments: - * TdiDeviceName: the transport device's name string - * ProviderInfo: TDI_PROVIDER_INFO struncture - * - * Return Value: - * NTSTATUS: Nt system status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsQueryProviderInfo( - PWSTR TdiDeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - PIRP Irp = NULL; - PMDL Mdl = NULL; - - UNICODE_STRING ControlName; - - HANDLE Handle; - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - - ULONG ProviderSize = 0; - - RtlInitUnicodeString(&ControlName, TdiDeviceName); - - // - // Open the Tdi Control Channel - // - - Status = KsOpenControl( - &ControlName, - &Handle, - &FileObject - ); - - if (!NT_SUCCESS(Status)) { - - KsPrint((2, "KsQueryProviderInfo: Fail to open the tdi control channel.\n")); - return (Status); - } - - // - // Obtain The Related Device Object - // - - DeviceObject = IoGetRelatedDeviceObject(FileObject); - - ProviderSize = sizeof(TDI_PROVIDER_INFO); - RtlZeroMemory(ProviderInfo, ProviderSize); - - // - // Allocating the Tdi Setting Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Locking the User Buffer / Allocating a MDL for it - // - - Status = KsLockUserBuffer( - ProviderInfo, - FALSE, - ProviderSize, - IoModifyAccess, - &Mdl - ); - - if (!NT_SUCCESS(Status)) { - - IoFreeIrp(Irp); - Irp = NULL; - } - } - - if (Irp) { - - LASSERT(NT_SUCCESS(Status)); - - TdiBuildQueryInformation( - Irp, - DeviceObject, - FileObject, - NULL, - NULL, - TDI_QUERY_PROVIDER_INFO, - Mdl - ); - - Status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - &ProviderSize - ); - - KsReleaseMdl(Mdl, FALSE); - } - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - //TDI_BUFFER_OVERFLOW - } - - KsCloseControl(Handle, FileObject); - - return (Status); -} - -/* - * KsQueryConnectionInfo - * Query the connection info of the FileObject specified - * (some statics data of the traffic) - * - * Arguments: - * FileObject: the FileObject to be queried - * ConnectionInfo: buffer to contain the connection info - * ConnectionSize: length of the ConnectionInfo buffer - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * NOTES: - * N/A - */ - -NTSTATUS -KsQueryConnectionInfo( - PFILE_OBJECT ConnectionObject, - PTDI_CONNECTION_INFO ConnectionInfo, - PULONG ConnectionSize - ) -{ - NTSTATUS Status = STATUS_UNSUCCESSFUL; - PIRP Irp = NULL; - PMDL Mdl; - PDEVICE_OBJECT DeviceObject; - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - RtlZeroMemory(ConnectionInfo, *(ConnectionSize)); - - // - // Allocating the Tdi Query Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Locking the User Buffer / Allocating a MDL for it - // - - Status = KsLockUserBuffer( - ConnectionInfo, - FALSE, - *(ConnectionSize), - IoModifyAccess, - &Mdl - ); - - if (NT_SUCCESS(Status)) { - - IoFreeIrp(Irp); - Irp = NULL; - } - } - - if (Irp) { - - LASSERT(NT_SUCCESS(Status)); - - TdiBuildQueryInformation( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL, - TDI_QUERY_CONNECTION_INFO, - Mdl - ); - - Status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - ConnectionSize - ); - - KsReleaseMdl(Mdl, FALSE); - } - - return (Status); -} - - -/* - * KsInitializeTdiAddress - * Initialize the tdi addresss - * - * Arguments: - * pTransportAddress: tdi address to be initialized - * IpAddress: the ip address of object - * IpPort: the ip port of the object - * - * Return Value: - * ULONG: the total size of the tdi address - * - * NOTES: - * N/A - */ - -ULONG -KsInitializeTdiAddress( - IN OUT PTA_IP_ADDRESS pTransportAddress, - IN ULONG IpAddress, - IN USHORT IpPort - ) -{ - pTransportAddress->TAAddressCount = 1; - pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP; - pTransportAddress->Address[ 0 ].AddressType = TDI_ADDRESS_TYPE_IP; - pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort; - pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr = IpAddress; - - return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP); -} - -/* - * KsQueryTdiAddressLength - * Query the total size of the tdi address - * - * Arguments: - * pTransportAddress: tdi address to be queried - * - * Return Value: - * ULONG: the total size of the tdi address - * - * NOTES: - * N/A - */ - -ULONG -KsQueryTdiAddressLength( - PTRANSPORT_ADDRESS pTransportAddress - ) -{ - ULONG TotalLength = 0; - LONG i; - - PTA_ADDRESS UNALIGNED pTaAddress = NULL; - - ASSERT (NULL != pTransportAddress); - - TotalLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) + - FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount; - - pTaAddress = (TA_ADDRESS UNALIGNED *)pTransportAddress->Address; - - for (i = 0; i < pTransportAddress->TAAddressCount; i++) - { - TotalLength += pTaAddress->AddressLength; - pTaAddress = (TA_ADDRESS UNALIGNED *)((PCHAR)pTaAddress + - FIELD_OFFSET(TA_ADDRESS,Address) + - pTaAddress->AddressLength ); - } - - return (TotalLength); -} - - -/* - * KsQueryIpAddress - * Query the ip address of the tdi object - * - * Arguments: - * FileObject: tdi object to be queried - * TdiAddress: TdiAddress buffer, to store the queried - * tdi ip address - * AddressLength: buffer length of the TdiAddress - * - * Return Value: - * ULONG: the total size of the tdi ip address - * - * NOTES: - * N/A - */ - -NTSTATUS -KsQueryIpAddress( - PFILE_OBJECT FileObject, - PVOID TdiAddress, - ULONG* AddressLength - ) -{ - NTSTATUS Status; - - PTDI_ADDRESS_INFO TdiAddressInfo; - ULONG Length; - - - // - // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS - // - - Length = MAX_ADDRESS_LENGTH; - - TdiAddressInfo = (PTDI_ADDRESS_INFO) - ExAllocatePoolWithTag( - NonPagedPool, - Length, - 'KSAI' ); - - if (NULL == TdiAddressInfo) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - - Status = KsQueryAddressInfo( - FileObject, - TdiAddressInfo, - &Length - ); - -errorout: - - if (NT_SUCCESS(Status)) - { - if (*AddressLength < Length) { - - Status = STATUS_BUFFER_TOO_SMALL; - - } else { - - *AddressLength = Length; - RtlCopyMemory( - TdiAddress, - &(TdiAddressInfo->Address), - Length - ); - - Status = STATUS_SUCCESS; - } - - } else { - - } - - - if (NULL != TdiAddressInfo) { - - ExFreePool(TdiAddressInfo); - } - - return Status; -} - - -/* - * KsErrorEventHandler - * the common error event handler callback - * - * Arguments: - * TdiEventContext: should be the socket - * Status: the error code - * - * Return Value: - * Status: STATS_SUCCESS - * - * NOTES: - * We need not do anything in such a severe - * error case. System will process it for us. - */ - -NTSTATUS -KsErrorEventHandler( - IN PVOID TdiEventContext, - IN NTSTATUS Status - ) -{ - KsPrint((2, "KsErrorEventHandler called at Irql = %xh ...\n", - KeGetCurrentIrql())); - - cfs_enter_debugger(); - - return (STATUS_SUCCESS); -} - - -/* - * ks_set_handlers - * setup all the event handler callbacks - * - * Arguments: - * tconn: the tdi connecton object - * - * Return Value: - * int: ks error code - * - * NOTES: - * N/A - */ - -int -ks_set_handlers( - ksock_tconn_t * tconn - ) -{ - NTSTATUS status = STATUS_SUCCESS; - KS_EVENT_HANDLERS handlers; - - /* to make sure the address object is opened already */ - if (tconn->kstc_addr.FileObject == NULL) { - goto errorout; - } - - /* initialize the handlers indictor array. for sender and listenr, - there are different set of callbacks. for child, we just return. */ - - memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); - - SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler); - SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler); - SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler); - SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler); - SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler); - - // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler); - - if (tconn->kstc_type == kstt_listener) { - SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler); - } else if (tconn->kstc_type == kstt_child) { - goto errorout; - } - - /* set all the event callbacks */ - status = KsSetEventHandlers( - tconn->kstc_addr.FileObject, /* Address File Object */ - tconn, /* Event Context */ - &handlers /* Event callback handlers */ - ); - -errorout: - - return cfs_error_code(status); -} - - -/* - * ks_reset_handlers - * disable all the event handler callbacks (set to NULL) - * - * Arguments: - * tconn: the tdi connecton object - * - * Return Value: - * int: ks error code - * - * NOTES: - * N/A - */ - -int -ks_reset_handlers( - ksock_tconn_t * tconn - ) -{ - NTSTATUS status = STATUS_SUCCESS; - KS_EVENT_HANDLERS handlers; - - /* to make sure the address object is opened already */ - if (tconn->kstc_addr.FileObject == NULL) { - goto errorout; - } - - /* initialize the handlers indictor array. for sender and listenr, - there are different set of callbacks. for child, we just return. */ - - memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); - - SetEventHandler(handlers, TDI_EVENT_ERROR, NULL); - SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL); - SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL); - SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL); - SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL); - // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL); - - if (tconn->kstc_type == kstt_listener) { - SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL); - } else if (tconn->kstc_type == kstt_child) { - goto errorout; - } - - /* set all the event callbacks */ - status = KsSetEventHandlers( - tconn->kstc_addr.FileObject, /* Address File Object */ - tconn, /* Event Context */ - &handlers /* Event callback handlers */ - ); - -errorout: - - return cfs_error_code(status); -} - - -/* - * KsAcceptCompletionRoutine - * Irp completion routine for TdiBuildAccept (KsConnectEventHandler) - * - * Here system gives us a chance to check the conneciton is built - * ready or not. - * - * Arguments: - * DeviceObject: the device object of the transport driver - * Irp: the Irp is being completed. - * Context: the context we specified when issuing the Irp - * - * Return Value: - * Nt status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsAcceptCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - ksock_tconn_t * child = (ksock_tconn_t *) Context; - ksock_tconn_t * parent = child->child.kstc_parent; - - KsPrint((2, "KsAcceptCompletionRoutine: called at Irql: %xh\n", - KeGetCurrentIrql() )); - - KsPrint((2, "KsAcceptCompletionRoutine: Context = %xh Status = %xh\n", - Context, Irp->IoStatus.Status)); - - LASSERT(child->kstc_type == kstt_child); - - spin_lock(&(child->kstc_lock)); - - LASSERT(parent->kstc_state == ksts_listening); - LASSERT(child->kstc_state == ksts_connecting); - - if (NT_SUCCESS(Irp->IoStatus.Status)) { - - child->child.kstc_accepted = TRUE; - - child->kstc_state = ksts_connected; - - /* wake up the daemon thread which waits on this event */ - KeSetEvent( - &(parent->listener.kstc_accept_event), - 0, - FALSE - ); - - spin_unlock(&(child->kstc_lock)); - - KsPrint((2, "KsAcceptCompletionRoutine: Get %xh now signal the event ...\n", parent)); - - } else { - - /* re-use this child connecton */ - child->child.kstc_accepted = FALSE; - child->child.kstc_busy = FALSE; - child->kstc_state = ksts_associated; - - spin_unlock(&(child->kstc_lock)); - } - - /* now free the Irp */ - IoFreeIrp(Irp); - - /* drop the refer count of the child */ - ks_put_tconn(child); - - return (STATUS_MORE_PROCESSING_REQUIRED); -} - - -/* - * ks_get_vacancy_backlog - * Get a vacancy listeing child from the backlog list - * - * Arguments: - * parent: the listener daemon connection - * - * Return Value: - * the child listening connection or NULL in failure - * - * Notes - * Parent's lock should be acquired before calling. - */ - -ksock_tconn_t * -ks_get_vacancy_backlog( - ksock_tconn_t * parent - ) -{ - ksock_tconn_t * child; - - LASSERT(parent->kstc_type == kstt_listener); - LASSERT(parent->kstc_state == ksts_listening); - - if (list_empty(&(parent->listener.kstc_listening.list))) { - - child = NULL; - - } else { - - struct list_head * tmp; - - /* check the listening queue and try to get a free connecton */ - - list_for_each(tmp, &(parent->listener.kstc_listening.list)) { - child = list_entry (tmp, ksock_tconn_t, child.kstc_link); - spin_lock(&(child->kstc_lock)); - - if (!child->child.kstc_busy) { - LASSERT(child->kstc_state == ksts_associated); - child->child.kstc_busy = TRUE; - spin_unlock(&(child->kstc_lock)); - break; - } else { - spin_unlock(&(child->kstc_lock)); - child = NULL; - } - } - } - - return child; -} - -ks_addr_slot_t * -KsSearchIpAddress(PUNICODE_STRING DeviceName) -{ - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - - spin_lock(&ks_data.ksnd_addrs_lock); - - list = ks_data.ksnd_addrs_list.Flink; - while (list != &ks_data.ksnd_addrs_list) { - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - if (RtlCompareUnicodeString( - DeviceName, - &slot->devname, - TRUE) == 0) { - break; - } - list = list->Flink; - slot = NULL; - } - - spin_unlock(&ks_data.ksnd_addrs_lock); - - return slot; -} - -void -KsCleanupIpAddresses() -{ - spin_lock(&ks_data.ksnd_addrs_lock); - - while (!IsListEmpty(&ks_data.ksnd_addrs_list)) { - - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - - list = RemoveHeadList(&ks_data.ksnd_addrs_list); - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - cfs_free(slot); - ks_data.ksnd_naddrs--; - } - - cfs_assert(ks_data.ksnd_naddrs == 0); - spin_unlock(&ks_data.ksnd_addrs_lock); -} - -VOID -KsAddAddressHandler( - IN PTA_ADDRESS Address, - IN PUNICODE_STRING DeviceName, - IN PTDI_PNP_CONTEXT Context - ) -{ - PTDI_ADDRESS_IP IpAddress = NULL; - - if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && - Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { - - ks_addr_slot_t * slot = NULL; - - IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; - KsPrint((1, "KsAddAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n", - DeviceName, Context, IpAddress->in_addr, - (IpAddress->in_addr & 0xFF000000) >> 24, - (IpAddress->in_addr & 0x00FF0000) >> 16, - (IpAddress->in_addr & 0x0000FF00) >> 8, - (IpAddress->in_addr & 0x000000FF) >> 0 )); - - slot = KsSearchIpAddress(DeviceName); - - if (slot != NULL) { - slot->up = TRUE; - slot->ip_addr = ntohl(IpAddress->in_addr); - } else { - slot = cfs_alloc(sizeof(ks_addr_slot_t) + DeviceName->Length, CFS_ALLOC_ZERO); - if (slot != NULL) { - spin_lock(&ks_data.ksnd_addrs_lock); - InsertTailList(&ks_data.ksnd_addrs_list, &slot->link); - sprintf(slot->iface, "eth%d", ks_data.ksnd_naddrs++); - slot->ip_addr = ntohl(IpAddress->in_addr); - slot->up = TRUE; - RtlMoveMemory(&slot->buffer[0], DeviceName->Buffer, DeviceName->Length); - slot->devname.Length = DeviceName->Length; - slot->devname.MaximumLength = DeviceName->Length + sizeof(WCHAR); - slot->devname.Buffer = slot->buffer; - spin_unlock(&ks_data.ksnd_addrs_lock); - } - } - } -} - -VOID -KsDelAddressHandler( - IN PTA_ADDRESS Address, - IN PUNICODE_STRING DeviceName, - IN PTDI_PNP_CONTEXT Context - ) -{ - PTDI_ADDRESS_IP IpAddress = NULL; - - if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && - Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { - - ks_addr_slot_t * slot = NULL; - - slot = KsSearchIpAddress(DeviceName); - - if (slot != NULL) { - slot->up = FALSE; - } - - IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; - KsPrint((1, "KsDelAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n", - DeviceName, Context, IpAddress->in_addr, - (IpAddress->in_addr & 0xFF000000) >> 24, - (IpAddress->in_addr & 0x00FF0000) >> 16, - (IpAddress->in_addr & 0x0000FF00) >> 8, - (IpAddress->in_addr & 0x000000FF) >> 0 )); - } -} - -NTSTATUS -KsRegisterPnpHandlers() -{ - TDI20_CLIENT_INTERFACE_INFO ClientInfo; - - /* initialize the global ks_data members */ - RtlInitUnicodeString(&ks_data.ksnd_client_name, TDILND_MODULE_NAME); - spin_lock_init(&ks_data.ksnd_addrs_lock); - InitializeListHead(&ks_data.ksnd_addrs_list); - - /* register the pnp handlers */ - RtlZeroMemory(&ClientInfo, sizeof(ClientInfo)); - ClientInfo.TdiVersion = TDI_CURRENT_VERSION; - - ClientInfo.ClientName = &ks_data.ksnd_client_name; - ClientInfo.AddAddressHandlerV2 = KsAddAddressHandler; - ClientInfo.DelAddressHandlerV2 = KsDelAddressHandler; - - return TdiRegisterPnPHandlers(&ClientInfo, sizeof(ClientInfo), - &ks_data.ksnd_pnp_handle); -} - -VOID -KsDeregisterPnpHandlers() -{ - if (ks_data.ksnd_pnp_handle) { - - /* De-register the pnp handlers */ - - TdiDeregisterPnPHandlers(ks_data.ksnd_pnp_handle); - ks_data.ksnd_pnp_handle = NULL; - - /* cleanup all the ip address slots */ - KsCleanupIpAddresses(); - } -} - -/* - * KsConnectEventHandler - * Connect event handler event handler, called by the underlying TDI - * transport in response to an incoming request to the listening daemon. - * - * it will grab a vacancy backlog from the children tconn list, and - * build an acception Irp with it, then transfer the Irp to TDI driver. - * - * Arguments: - * TdiEventContext: the tdi connnection object of the listening daemon - * ...... - * - * Return Value: - * Nt kernel status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsConnectEventHandler( - IN PVOID TdiEventContext, - IN LONG RemoteAddressLength, - IN PVOID RemoteAddress, - IN LONG UserDataLength, - IN PVOID UserData, - IN LONG OptionsLength, - IN PVOID Options, - OUT CONNECTION_CONTEXT * ConnectionContext, - OUT PIRP * AcceptIrp - ) -{ - ksock_tconn_t * parent; - ksock_tconn_t * child; - - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - NTSTATUS Status; - - PIRP Irp = NULL; - PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; - - KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql())); - parent = (ksock_tconn_t *) TdiEventContext; - - LASSERT(parent->kstc_type == kstt_listener); - - spin_lock(&(parent->kstc_lock)); - - if (parent->kstc_state == ksts_listening) { - - /* allocate a new ConnectionInfo to backup the peer's info */ - - ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( - NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + - RemoteAddressLength, 'iCsK' ); - - if (NULL == ConnectionInfo) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - cfs_enter_debugger(); - goto errorout; - } - - /* initializing ConnectionInfo structure ... */ - - ConnectionInfo->UserDataLength = UserDataLength; - ConnectionInfo->UserData = UserData; - ConnectionInfo->OptionsLength = OptionsLength; - ConnectionInfo->Options = Options; - ConnectionInfo->RemoteAddressLength = RemoteAddressLength; - ConnectionInfo->RemoteAddress = ConnectionInfo + 1; - - RtlCopyMemory( - ConnectionInfo->RemoteAddress, - RemoteAddress, - RemoteAddressLength - ); - - /* get the vacancy listening child tdi connections */ - - child = ks_get_vacancy_backlog(parent); - - if (child) { - - spin_lock(&(child->kstc_lock)); - child->child.kstc_info.ConnectionInfo = ConnectionInfo; - child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress; - child->kstc_state = ksts_connecting; - spin_unlock(&(child->kstc_lock)); - - } else { - - KsPrint((2, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent)); - - Status = STATUS_INSUFFICIENT_RESOURCES; - - goto errorout; - } - - FileObject = child->child.kstc_info.FileObject; - DeviceObject = IoGetRelatedDeviceObject (FileObject); - - Irp = KsBuildTdiIrp(DeviceObject); - - TdiBuildAccept( - Irp, - DeviceObject, - FileObject, - KsAcceptCompletionRoutine, - child, - NULL, - NULL - ); - - IoSetNextIrpStackLocation(Irp); - - /* grap the refer of the child tdi connection */ - ks_get_tconn(child); - - Status = STATUS_MORE_PROCESSING_REQUIRED; - - *AcceptIrp = Irp; - *ConnectionContext = child; - - } else { - - Status = STATUS_CONNECTION_REFUSED; - goto errorout; - } - - spin_unlock(&(parent->kstc_lock)); - - return Status; - -errorout: - - spin_unlock(&(parent->kstc_lock)); - - { - *AcceptIrp = NULL; - *ConnectionContext = NULL; - - if (ConnectionInfo) { - - ExFreePool(ConnectionInfo); - } - - if (Irp) { - - IoFreeIrp (Irp); - } - } - - return Status; -} - -/* - * KsDisconnectCompletionRoutine - * the Irp completion routine for TdiBuildDisconect - * - * We just signal the event and return MORE_PRO... to - * let the caller take the responsibility of the Irp. - * - * Arguments: - * DeviceObject: the device object of the transport - * Irp: the Irp is being completed. - * Context: the event specified by the caller - * - * Return Value: - * Nt status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsDisconectCompletionRoutine ( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - - KeSetEvent((PKEVENT) Context, 0, FALSE); - - return STATUS_MORE_PROCESSING_REQUIRED; - - UNREFERENCED_PARAMETER(DeviceObject); -} - - -/* - * KsDisconnectHelper - * the routine to be executed in the WorkItem procedure - * this routine is to disconnect a tdi connection - * - * Arguments: - * Workitem: the context transferred to the workitem - * - * Return Value: - * N/A - * - * Notes: - * tconn is already referred in abort_connecton ... - */ - -VOID -KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem) -{ - ksock_tconn_t * tconn = WorkItem->tconn; - - DbgPrint("KsDisconnectHelper: disconnecting tconn=%p\n", tconn); - ks_disconnect_tconn(tconn, WorkItem->Flags); - - KeSetEvent(&(WorkItem->Event), 0, FALSE); - - spin_lock(&(tconn->kstc_lock)); - cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); -} - - -/* - * KsDisconnectEventHandler - * Disconnect event handler event handler, called by the underlying TDI transport - * in response to an incoming disconnection notification from a remote node. - * - * Arguments: - * ConnectionContext: tdi connnection object - * DisconnectFlags: specifies the nature of the disconnection - * ...... - * - * Return Value: - * Nt kernel status code - * - * Notes: - * N/A - */ - - -NTSTATUS -KsDisconnectEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN LONG DisconnectDataLength, - IN PVOID DisconnectData, - IN LONG DisconnectInformationLength, - IN PVOID DisconnectInformation, - IN ULONG DisconnectFlags - ) -{ - ksock_tconn_t * tconn; - NTSTATUS Status; - PKS_DISCONNECT_WORKITEM WorkItem; - - tconn = (ksock_tconn_t *)ConnectionContext; - - KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n", - KeGetCurrentIrql() )); - - KsPrint((2, "tconn = %x DisconnectFlags= %xh\n", - tconn, DisconnectFlags)); - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); - - WorkItem = &(tconn->kstc_disconnect); - - if (tconn->kstc_state != ksts_connected) { - - Status = STATUS_SUCCESS; - - } else { - - if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) { - - Status = STATUS_REMOTE_DISCONNECT; - - } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) { - - Status = STATUS_GRACEFUL_DISCONNECT; - } - - if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { - - ks_get_tconn(tconn); - - WorkItem->Flags = DisconnectFlags; - WorkItem->tconn = tconn; - - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - - /* queue the workitem to call */ - ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue); - } - } - - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (Status); -} - -NTSTATUS -KsTcpReceiveCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ) -{ - NTSTATUS Status = Irp->IoStatus.Status; - - if (NT_SUCCESS(Status)) { - - ksock_tconn_t *tconn = Context->tconn; - - PKS_TSDU_DAT KsTsduDat = Context->CompletionContext; - PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext; - - KsPrint((1, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n", - Context->KsTsduMgr->TotalBytes )); - - spin_lock(&(tconn->kstc_lock)); - - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) { - cfs_clear_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING); - } else { - cfs_enter_debugger(); - } - } else { - ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); - if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) { - cfs_clear_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING); - } else { - cfs_enter_debugger(); - } - } - - spin_unlock(&(tconn->kstc_lock)); - - /* wake up the thread waiting for the completion of this Irp */ - KeSetEvent(Context->Event, 0, FALSE); - - /* re-active the ks connection and wake up the scheduler */ - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, FALSE, NULL, - Context->KsTsduMgr->TotalBytes ); - } - - } else { - - /* un-expected errors occur, we must abort the connection */ - ks_abort_tconn(Context->tconn); - } - - if (Context) { - - /* Freeing the Context structure... */ - ExFreePool(Context); - Context = NULL; - } - - - /* free the Irp */ - if (Irp) { - IoFreeIrp(Irp); - } - - return (Status); -} - - -/* - * KsTcpCompletionRoutine - * the Irp completion routine for TdiBuildSend and TdiBuildReceive ... - * We need call the use's own CompletionRoutine if specified. Or - * it's a synchronous case, we need signal the event. - * - * Arguments: - * DeviceObject: the device object of the transport - * Irp: the Irp is being completed. - * Context: the context we specified when issuing the Irp - * - * Return Value: - * Nt status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsTcpCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - if (Context) { - - PKS_TCP_COMPLETION_CONTEXT CompletionContext = NULL; - ksock_tconn_t * tconn = NULL; - - CompletionContext = (PKS_TCP_COMPLETION_CONTEXT) Context; - tconn = CompletionContext->tconn; - - /* release the chained mdl */ - KsReleaseMdl(Irp->MdlAddress, FALSE); - Irp->MdlAddress = NULL; - - if (CompletionContext->CompletionRoutine) { - - if ( CompletionContext->bCounted && - InterlockedDecrement(&CompletionContext->ReferCount) != 0 ) { - goto errorout; - } - - // - // Giving control to user specified CompletionRoutine ... - // - - CompletionContext->CompletionRoutine( - Irp, - CompletionContext - ); - - } else { - - // - // Signaling the Event ... - // - - KeSetEvent(CompletionContext->Event, 0, FALSE); - } - - /* drop the reference count of the tconn object */ - ks_put_tconn(tconn); - - } else { - - cfs_enter_debugger(); - } - -errorout: - - return STATUS_MORE_PROCESSING_REQUIRED; -} - -/* - * KsTcpSendCompletionRoutine - * the user specified Irp completion routine for asynchronous - * data transmission requests. - * - * It will do th cleanup job of the ksock_tx_t and wake up the - * ks scheduler thread - * - * Arguments: - * Irp: the Irp is being completed. - * Context: the context we specified when issuing the Irp - * - * Return Value: - * Nt status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsTcpSendCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ) -{ - NTSTATUS Status = Irp->IoStatus.Status; - ULONG rc = Irp->IoStatus.Information; - ksock_tconn_t * tconn = Context->tconn; - PKS_TSDUMGR KsTsduMgr = Context->KsTsduMgr; - - ENTRY; - - LASSERT(tconn) ; - - if (NT_SUCCESS(Status)) { - - if (Context->bCounted) { - PVOID tx = Context->CompletionContext; - - ASSERT(tconn->kstc_update_tx != NULL); - - /* update the tx, rebasing the kiov or iov pointers */ - tx = tconn->kstc_update_tx(tconn, tx, rc); - - /* update the KsTsudMgr total bytes */ - spin_lock(&tconn->kstc_lock); - KsTsduMgr->TotalBytes -= rc; - spin_unlock(&tconn->kstc_lock); - - /* - * now it's time to re-queue the conns into the - * scheduler queue and wake the scheduler thread. - */ - - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, TRUE, tx, 0); - } - - } else { - - PKS_TSDU KsTsdu = Context->CompletionContext; - PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext2; - PKS_TSDU_DAT KsTsduDat = Context->CompletionContext2; - - spin_lock(&tconn->kstc_lock); - /* This is bufferred sending ... */ - ASSERT(KsTsduBuf->StartOffset == 0); - - if (KsTsduBuf->DataLength > Irp->IoStatus.Information) { - /* not fully sent .... we have to abort the connection */ - spin_unlock(&tconn->kstc_lock); - ks_abort_tconn(tconn); - goto errorout; - } - - if (KsTsduBuf->TsduType == TSDU_TYPE_BUF) { - /* free the buffer */ - ExFreePool(KsTsduBuf->UserBuffer); - KsTsduMgr->TotalBytes -= KsTsduBuf->DataLength; - KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); - } else if (KsTsduDat->TsduType == TSDU_TYPE_DAT) { - KsTsduMgr->TotalBytes -= KsTsduDat->DataLength; - KsTsdu->StartOffset += KsTsduDat->TotalLength; - } else { - cfs_enter_debugger(); /* shoult not get here */ - } - - if (KsTsdu->StartOffset == KsTsdu->LastOffset) { - - list_del(&KsTsdu->Link); - KsTsduMgr->NumOfTsdu--; - KsPutKsTsdu(KsTsdu); - } - - spin_unlock(&tconn->kstc_lock); - } - - } else { - - /* cfs_enter_debugger(); */ - - /* - * for the case that the transmission is ussuccessful, - * we need abort the tdi connection, but not destroy it. - * the socknal conn will drop the refer count, then the - * tdi connection will be freed. - */ - - ks_abort_tconn(tconn); - } - -errorout: - - /* freeing the Context structure... */ - - if (Context) { - ExFreePool(Context); - Context = NULL; - } - - /* it's our duty to free the Irp. */ - - if (Irp) { - IoFreeIrp(Irp); - Irp = NULL; - } - - EXIT; - - return Status; -} - -/* - * Normal receive event handler - * - * It will move data from system Tsdu to our TsduList - */ - -NTSTATUS -KsTcpReceiveEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ) -{ - NTSTATUS Status; - - ksock_tconn_t * tconn; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_DAT KsTsduDat; - PKS_TSDU_BUF KsTsduBuf; - - BOOLEAN bIsExpedited; - BOOLEAN bIsCompleteTsdu; - - BOOLEAN bNewTsdu = FALSE; - BOOLEAN bNewBuff = FALSE; - - PCHAR Buffer = NULL; - - PIRP Irp = NULL; - PMDL Mdl = NULL; - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - - ULONG BytesReceived = 0; - - PKS_TCP_COMPLETION_CONTEXT context = NULL; - - - tconn = (ksock_tconn_t *) ConnectionContext; - - ks_get_tconn(tconn); - - /* check whether the whole body of payload is received or not */ - if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) && - (BytesIndicated == BytesAvailable) ) { - bIsCompleteTsdu = TRUE; - } else { - bIsCompleteTsdu = FALSE; - } - - bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); - - KsPrint((2, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n", BytesIndicated, BytesAvailable)); - KsPrint((2, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited )); - - spin_lock(&(tconn->kstc_lock)); - - /* check whether we are conntected or not listener ¡Â*/ - if ( !((tconn->kstc_state == ksts_connected) && - (tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child))) { - - *BytesTaken = BytesIndicated; - - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (STATUS_SUCCESS); - } - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } - - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - - /* if the Tsdu is even larger than the biggest Tsdu, we have - to allocate new buffer and use TSDU_TYOE_BUF to store it */ - - if ( KS_TSDU_STRU_SIZE(BytesAvailable) > ks_data.ksnd_tsdu_size - - KS_DWORD_ALIGN(sizeof(KS_TSDU))) { - bNewBuff = TRUE; - } - - /* retrieve the latest Tsdu buffer form TsduMgr - list if the list is not empty. */ - - if (list_empty(&(KsTsduMgr->TsduList))) { - - LASSERT(KsTsduMgr->NumOfTsdu == 0); - KsTsdu = NULL; - - } else { - - LASSERT(KsTsduMgr->NumOfTsdu > 0); - KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); - - /* if this Tsdu does not contain enough space, we need - allocate a new Tsdu queue. */ - - if (bNewBuff) { - if ( KsTsdu->LastOffset + sizeof(KS_TSDU_BUF) > - KsTsdu->TotalLength ) { - KsTsdu = NULL; - } - } else { - if ( KS_TSDU_STRU_SIZE(BytesAvailable) > - KsTsdu->TotalLength - KsTsdu->LastOffset ) { - KsTsdu = NULL; - } - } - } - - /* allocating the buffer for TSDU_TYPE_BUF */ - if (bNewBuff) { - Buffer = ExAllocatePool(NonPagedPool, BytesAvailable); - if (NULL == Buffer) { - /* there's no enough memory for us. We just try to - receive maximum bytes with a new Tsdu */ - bNewBuff = FALSE; - KsTsdu = NULL; - } - } - - /* allocate a new Tsdu in case we are not statisfied. */ - - if (NULL == KsTsdu) { - - KsTsdu = KsAllocateKsTsdu(); - - if (NULL == KsTsdu) { - goto errorout; - } else { - bNewTsdu = TRUE; - } - } - - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - - if (bNewBuff) { - - /* setup up the KS_TSDU_BUF record */ - - KsTsduBuf->TsduType = TSDU_TYPE_BUF; - KsTsduBuf->TsduFlags = 0; - KsTsduBuf->StartOffset = 0; - KsTsduBuf->UserBuffer = Buffer; - KsTsduBuf->DataLength = BytesReceived = BytesAvailable; - - KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); - - } else { - - /* setup the KS_TSDU_DATA to contain all the messages */ - - KsTsduDat->TsduType = TSDU_TYPE_DAT; - KsTsduDat->TsduFlags = 0; - - if ( KsTsdu->TotalLength - KsTsdu->LastOffset >= - KS_TSDU_STRU_SIZE(BytesAvailable) ) { - BytesReceived = BytesAvailable; - } else { - BytesReceived = KsTsdu->TotalLength - KsTsdu->LastOffset - - FIELD_OFFSET(KS_TSDU_DAT, Data); - BytesReceived &= (~((ULONG)3)); - } - KsTsduDat->DataLength = BytesReceived; - KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE(BytesReceived); - KsTsduDat->StartOffset = 0; - - Buffer = &KsTsduDat->Data[0]; - - KsTsdu->LastOffset += KsTsduDat->TotalLength; - } - - KsTsduMgr->TotalBytes += BytesReceived; - - if (bIsCompleteTsdu) { - - /* It's a complete receive, we just move all - the data from system to our Tsdu */ - - RtlMoveMemory( - Buffer, - Tsdu, - BytesReceived - ); - - *BytesTaken = BytesReceived; - Status = STATUS_SUCCESS; - - if (bNewTsdu) { - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } - - KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); - - /* re-active the ks connection and wake up the scheduler */ - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, FALSE, NULL, - KsTsduMgr->TotalBytes ); - } - - } else { - - /* there's still data in tdi internal queue, we need issue a new - Irp to receive all of them. first allocate the tcp context */ - - context = ExAllocatePoolWithTag( - NonPagedPool, - sizeof(KS_TCP_COMPLETION_CONTEXT), - 'cTsK'); - - if (!context) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - /* setup the context */ - RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT)); - - context->tconn = tconn; - context->CompletionRoutine = KsTcpReceiveCompletionRoutine; - context->CompletionContext = KsTsdu; - context->CompletionContext = bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat; - context->KsTsduMgr = KsTsduMgr; - context->Event = &(KsTsduMgr->Event); - - if (tconn->kstc_type == kstt_sender) { - FileObject = tconn->sender.kstc_info.FileObject; - } else { - FileObject = tconn->child.kstc_info.FileObject; - } - - DeviceObject = IoGetRelatedDeviceObject(FileObject); - - /* build new tdi Irp and setup it. */ - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - goto errorout; - } - - Status = KsLockUserBuffer( - Buffer, - FALSE, - BytesReceived, - IoModifyAccess, - &Mdl - ); - - if (!NT_SUCCESS(Status)) { - goto errorout; - } - - TdiBuildReceive( - Irp, - DeviceObject, - FileObject, - KsTcpCompletionRoutine, - context, - Mdl, - ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED), - BytesReceived - ); - - IoSetNextIrpStackLocation(Irp); - - /* return the newly built Irp to transport driver, - it will process it to receive all the data */ - - *IoRequestPacket = Irp; - *BytesTaken = 0; - - if (bNewTsdu) { - - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } - - if (bNewBuff) { - cfs_set_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING); - } else { - cfs_set_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING); - } - ks_get_tconn(tconn); - Status = STATUS_MORE_PROCESSING_REQUIRED; - } - - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (Status); - -errorout: - - spin_unlock(&(tconn->kstc_lock)); - - if (bNewTsdu && (KsTsdu != NULL)) { - KsFreeKsTsdu(KsTsdu); - } - - if (Mdl) { - KsReleaseMdl(Mdl, FALSE); - } - - if (Irp) { - IoFreeIrp(Irp); - } - - if (context) { - ExFreePool(context); - } - - ks_abort_tconn(tconn); - ks_put_tconn(tconn); - - *BytesTaken = BytesAvailable; - Status = STATUS_SUCCESS; - - return (Status); -} - -/* - * Expedited receive event handler - */ - -NTSTATUS -KsTcpReceiveExpeditedEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ) -{ - return KsTcpReceiveEventHandler( - TdiEventContext, - ConnectionContext, - ReceiveFlags | TDI_RECEIVE_EXPEDITED, - BytesIndicated, - BytesAvailable, - BytesTaken, - Tsdu, - IoRequestPacket - ); -} - - -/* - * Bulk receive event handler - * - * It will queue all the system Tsdus to our TsduList. - * Then later ks_recv_mdl will release them. - */ - -NTSTATUS -KsTcpChainedReceiveEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ) -{ - - NTSTATUS Status; - - ksock_tconn_t * tconn; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_MDL KsTsduMdl; - - BOOLEAN bIsExpedited; - BOOLEAN bNewTsdu = FALSE; - - tconn = (ksock_tconn_t *) ConnectionContext; - - bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); - - KsPrint((2, "KsTcpChainedReceive: ReceiveLength = %xh bIsExpedited = %d\n", ReceiveLength, bIsExpedited)); - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); - - /* check whether we are conntected or not listener ¡Â*/ - if ( !((tconn->kstc_state == ksts_connected) && - (tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child))) { - - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (STATUS_SUCCESS); - } - - /* get the latest Tsdu buffer form TsduMgr list. - just set NULL if the list is empty. */ - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } - - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - - if (list_empty(&(KsTsduMgr->TsduList))) { - - LASSERT(KsTsduMgr->NumOfTsdu == 0); - KsTsdu = NULL; - - } else { - - LASSERT(KsTsduMgr->NumOfTsdu > 0); - KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - if (sizeof(KS_TSDU_MDL) > KsTsdu->TotalLength - KsTsdu->LastOffset) { - KsTsdu = NULL; - } - } - - /* if there's no Tsdu or the free size is not enough for this - KS_TSDU_MDL structure. We need re-allocate a new Tsdu. */ - - if (NULL == KsTsdu) { - - KsTsdu = KsAllocateKsTsdu(); - - if (NULL == KsTsdu) { - goto errorout; - } else { - bNewTsdu = TRUE; - } - } - - /* just queue the KS_TSDU_MDL to the Tsdu buffer */ - - KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - - KsTsduMdl->TsduType = TSDU_TYPE_MDL; - KsTsduMdl->DataLength = ReceiveLength; - KsTsduMdl->StartOffset = StartingOffset; - KsTsduMdl->Mdl = Tsdu; - KsTsduMdl->Descriptor = TsduDescriptor; - - KsTsdu->LastOffset += sizeof(KS_TSDU_MDL); - KsTsduMgr->TotalBytes += ReceiveLength; - - KsPrint((2, "KsTcpChainedReceiveEventHandler: Total %xh bytes.\n", - KsTsduMgr->TotalBytes )); - - Status = STATUS_PENDING; - - /* attach it to the TsduMgr list if the Tsdu is newly created. */ - if (bNewTsdu) { - - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } - - spin_unlock(&(tconn->kstc_lock)); - - /* wake up the threads waiing in ks_recv_mdl */ - KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); - - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, FALSE, NULL, - KsTsduMgr->TotalBytes ); - } - - ks_put_tconn(tconn); - - /* Return STATUS_PENDING to system because we are still - owning the MDL resources. ks_recv_mdl is expected - to free the MDL resources. */ - - return (Status); - -errorout: - - spin_unlock(&(tconn->kstc_lock)); - - if (bNewTsdu && (KsTsdu != NULL)) { - KsFreeKsTsdu(KsTsdu); - } - - /* abort the tdi connection */ - ks_abort_tconn(tconn); - ks_put_tconn(tconn); - - - Status = STATUS_SUCCESS; - - return (Status); -} - - -/* - * Expedited & Bulk receive event handler - */ - -NTSTATUS -KsTcpChainedReceiveExpeditedEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ) -{ - return KsTcpChainedReceiveEventHandler( - TdiEventContext, - ConnectionContext, - ReceiveFlags | TDI_RECEIVE_EXPEDITED, - ReceiveLength, - StartingOffset, - Tsdu, - TsduDescriptor ); -} - - -VOID -KsPrintProviderInfo( - PWSTR DeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ) -{ - KsPrint((2, "%ws ProviderInfo:\n", DeviceName)); - - KsPrint((2, " Version : 0x%4.4X\n", ProviderInfo->Version )); - KsPrint((2, " MaxSendSize : %d\n", ProviderInfo->MaxSendSize )); - KsPrint((2, " MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData )); - KsPrint((2, " MaxDatagramSize : %d\n", ProviderInfo->MaxDatagramSize )); - KsPrint((2, " ServiceFlags : 0x%8.8X\n", ProviderInfo->ServiceFlags )); - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) { - KsPrint((2, " CONNECTION_MODE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) { - KsPrint((2, " ORDERLY_RELEASE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) { - KsPrint((2, " CONNECTIONLESS_MODE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) { - KsPrint((2, " ERROR_FREE_DELIVERY\n")); - } - - if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) { - KsPrint((2, " SECURITY_LEVEL\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) { - KsPrint((2, " BROADCAST_SUPPORTED\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) { - KsPrint((2, " MULTICAST_SUPPORTED\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) { - KsPrint((2, " DELAYED_ACCEPTANCE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) { - KsPrint((2, " EXPEDITED_DATA\n")); - } - - if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) { - KsPrint((2, " INTERNAL_BUFFERING\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) { - KsPrint((2, " ROUTE_DIRECTED\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) { - KsPrint((2, " NO_ZERO_LENGTH\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) { - KsPrint((2, " POINT_TO_POINT\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) { - KsPrint((2, " MESSAGE_MODE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) { - KsPrint((2, " HALF_DUPLEX\n")); - } - - KsPrint((2, " MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData )); - KsPrint((2, " MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData )); - KsPrint((2, " NumberOfResources : %d\n", ProviderInfo->NumberOfResources )); -} - - -/* - * KsAllocateKsTsdu - * Reuse a Tsdu from the freelist or allocate a new Tsdu - * from the LookAsideList table or the NonPagedPool - * - * Arguments: - * N/A - * - * Return Value: - * PKS_Tsdu: the new Tsdu or NULL if it fails - * - * Notes: - * N/A - */ - -PKS_TSDU -KsAllocateKsTsdu() -{ - PKS_TSDU KsTsdu = NULL; - - spin_lock(&(ks_data.ksnd_tsdu_lock)); - - if (!list_empty (&(ks_data.ksnd_freetsdus))) { - - LASSERT(ks_data.ksnd_nfreetsdus > 0); - - KsTsdu = list_entry(ks_data.ksnd_freetsdus.next, KS_TSDU, Link); - list_del(&(KsTsdu->Link)); - ks_data.ksnd_nfreetsdus--; - - } else { - - KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc( - ks_data.ksnd_tsdu_slab, 0); - } - - spin_unlock(&(ks_data.ksnd_tsdu_lock)); - - if (NULL != KsTsdu) { - KsInitializeKsTsdu(KsTsdu, ks_data.ksnd_tsdu_size); - } - - return (KsTsdu); -} - - -/* - * KsPutKsTsdu - * Move the Tsdu to the free tsdu list in ks_data. - * - * Arguments: - * KsTsdu: Tsdu to be moved. - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -VOID -KsPutKsTsdu( - PKS_TSDU KsTsdu - ) -{ - spin_lock(&(ks_data.ksnd_tsdu_lock)); - - list_add_tail( &(KsTsdu->Link), &(ks_data.ksnd_freetsdus)); - ks_data.ksnd_nfreetsdus++; - - spin_unlock(&(ks_data.ksnd_tsdu_lock)); -} - - -/* - * KsFreeKsTsdu - * Release a Tsdu: uninitialize then free it. - * - * Arguments: - * KsTsdu: Tsdu to be freed. - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -VOID -KsFreeKsTsdu( - PKS_TSDU KsTsdu - ) -{ - cfs_mem_cache_free( - ks_data.ksnd_tsdu_slab, - KsTsdu ); -} - - -/* - * KsInitializeKsTsdu - * Initialize the Tsdu buffer header - * - * Arguments: - * KsTsdu: the Tsdu to be initialized - * Length: the total length of the Tsdu - * - * Return Value: - * VOID - * - * NOTES: - * N/A - */ - -VOID -KsInitializeKsTsdu( - PKS_TSDU KsTsdu, - ULONG Length - ) -{ - RtlZeroMemory(KsTsdu, Length); - KsTsdu->Magic = KS_TSDU_MAGIC; - KsTsdu->TotalLength = Length; - KsTsdu->StartOffset = KsTsdu->LastOffset = - KS_DWORD_ALIGN(sizeof(KS_TSDU)); -} - - -/* - * KsInitializeKsTsduMgr - * Initialize the management structure of - * Tsdu buffers - * - * Arguments: - * TsduMgr: the TsduMgr to be initialized - * - * Return Value: - * VOID - * - * NOTES: - * N/A - */ - -VOID -KsInitializeKsTsduMgr( - PKS_TSDUMGR TsduMgr - ) -{ - KeInitializeEvent( - &(TsduMgr->Event), - NotificationEvent, - FALSE - ); - - CFS_INIT_LIST_HEAD( - &(TsduMgr->TsduList) - ); - - TsduMgr->NumOfTsdu = 0; - TsduMgr->TotalBytes = 0; -} - - -/* - * KsInitializeKsChain - * Initialize the China structure for receiving - * or transmitting - * - * Arguments: - * KsChain: the KsChain to be initialized - * - * Return Value: - * VOID - * - * NOTES: - * N/A - */ - -VOID -KsInitializeKsChain( - PKS_CHAIN KsChain - ) -{ - KsInitializeKsTsduMgr(&(KsChain->Normal)); - KsInitializeKsTsduMgr(&(KsChain->Expedited)); -} - - -/* - * KsCleanupTsduMgr - * Clean up all the Tsdus in the TsduMgr list - * - * Arguments: - * KsTsduMgr: the Tsdu list manager - * - * Return Value: - * NTSTATUS: nt status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsCleanupTsduMgr( - PKS_TSDUMGR KsTsduMgr - ) -{ - PKS_TSDU KsTsdu; - PKS_TSDU_DAT KsTsduDat; - PKS_TSDU_BUF KsTsduBuf; - PKS_TSDU_MDL KsTsduMdl; - - LASSERT(NULL != KsTsduMgr); - - KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); - - while (!list_empty(&KsTsduMgr->TsduList)) { - - KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - if (KsTsdu->StartOffset == KsTsdu->LastOffset) { - - // - // KsTsdu is empty now, we need free it ... - // - - list_del(&(KsTsdu->Link)); - KsTsduMgr->NumOfTsdu--; - - KsFreeKsTsdu(KsTsdu); - - } else { - - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - - KsTsdu->StartOffset += KsTsduDat->TotalLength; - - } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) { - - ASSERT(KsTsduBuf->UserBuffer != NULL); - - if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) { - ExFreePool(KsTsduBuf->UserBuffer); - } else { - cfs_enter_debugger(); - } - - KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); - - } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { - - // - // MDL Tsdu Unit ... - // - - TdiReturnChainedReceives( - &(KsTsduMdl->Descriptor), - 1 ); - - KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); - } - } - } - - return STATUS_SUCCESS; -} - - -/* - * KsCleanupKsChain - * Clean up the TsduMgrs of the KsChain - * - * Arguments: - * KsChain: the chain managing TsduMgr - * - * Return Value: - * NTSTATUS: nt status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsCleanupKsChain( - PKS_CHAIN KsChain - ) -{ - NTSTATUS Status; - - LASSERT(NULL != KsChain); - - Status = KsCleanupTsduMgr( - &(KsChain->Normal) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - Status = KsCleanupTsduMgr( - &(KsChain->Expedited) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - -errorout: - - return Status; -} - - -/* - * KsCleanupTsdu - * Clean up all the Tsdus of a tdi connected object - * - * Arguments: - * tconn: the tdi connection which is connected already. - * - * Return Value: - * Nt status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsCleanupTsdu( - ksock_tconn_t * tconn - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - - if (tconn->kstc_type != kstt_sender && - tconn->kstc_type != kstt_child ) { - - goto errorout; - } - - if (tconn->kstc_type == kstt_sender) { - - Status = KsCleanupKsChain( - &(tconn->sender.kstc_recv) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - Status = KsCleanupKsChain( - &(tconn->sender.kstc_send) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - } else { - - Status = KsCleanupKsChain( - &(tconn->child.kstc_recv) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - Status = KsCleanupKsChain( - &(tconn->child.kstc_send) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - } - -errorout: - - return (Status); -} - - -/* - * KsCopyMdlChainToMdlChain - * Copy data from a [chained] Mdl to anther [chained] Mdl. - * Tdi library does not provide this function. We have to - * realize it ourselives. - * - * Arguments: - * SourceMdlChain: the source mdl - * SourceOffset: start offset of the source - * DestinationMdlChain: the dst mdl - * DestinationOffset: the offset where data are to be copied. - * BytesTobecopied: the expteced bytes to be copied - * BytesCopied: to store the really copied data length - * - * Return Value: - * NTSTATUS: STATUS_SUCCESS or other error code - * - * NOTES: - * The length of source mdl must be >= SourceOffset + BytesTobecopied - */ - -NTSTATUS -KsCopyMdlChainToMdlChain( - IN PMDL SourceMdlChain, - IN ULONG SourceOffset, - IN PMDL DestinationMdlChain, - IN ULONG DestinationOffset, - IN ULONG BytesTobecopied, - OUT PULONG BytesCopied - ) -{ - PMDL SrcMdl = SourceMdlChain; - PMDL DstMdl = DestinationMdlChain; - - PUCHAR SrcBuf = NULL; - PUCHAR DstBuf = NULL; - - ULONG dwBytes = 0; - - NTSTATUS Status = STATUS_SUCCESS; - - - while (dwBytes < BytesTobecopied) { - - ULONG Length = 0; - - while (MmGetMdlByteCount(SrcMdl) <= SourceOffset) { - - SourceOffset -= MmGetMdlByteCount(SrcMdl); - - SrcMdl = SrcMdl->Next; - - if (NULL == SrcMdl) { - - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - } - - while (MmGetMdlByteCount(DstMdl) <= DestinationOffset) { - - DestinationOffset -= MmGetMdlByteCount(DstMdl); - - DstMdl = DstMdl->Next; - - if (NULL == DstMdl) { - - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - } - - DstBuf = (PUCHAR)KsMapMdlBuffer(DstMdl); - - if ((NULL == DstBuf)) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - // - // Here we need skip the OVERFLOW case via RtlCopyMemory :-( - // - - if ( KsQueryMdlsSize(SrcMdl) - SourceOffset > - MmGetMdlByteCount(DstMdl) - DestinationOffset ) { - - Length = BytesTobecopied - dwBytes; - - if (Length > KsQueryMdlsSize(SrcMdl) - SourceOffset) { - Length = KsQueryMdlsSize(SrcMdl) - SourceOffset; - } - - if (Length > MmGetMdlByteCount(DstMdl) - DestinationOffset) { - Length = MmGetMdlByteCount(DstMdl) - DestinationOffset; - } - - SrcBuf = (PUCHAR)KsMapMdlBuffer(SrcMdl); - - if ((NULL == DstBuf)) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - RtlCopyMemory( - DstBuf + DestinationOffset, - SrcBuf + SourceOffset, - Length - ); - - } else { - - Status = TdiCopyMdlToBuffer( - SrcMdl, - SourceOffset, - DstBuf, - DestinationOffset, - MmGetMdlByteCount(DstMdl), - &Length - ); - - if (STATUS_BUFFER_OVERFLOW == Status) { - cfs_enter_debugger(); - } else if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - } - - SourceOffset += Length; - DestinationOffset += Length; - dwBytes += Length; - } - -errorout: - - if (NT_SUCCESS(Status)) { - *BytesCopied = dwBytes; - } else { - *BytesCopied = 0; - } - - return Status; -} - - - -/* - * KsQueryMdlSize - * Query the whole size of a MDL (may be chained) - * - * Arguments: - * Mdl: the Mdl to be queried - * - * Return Value: - * ULONG: the total size of the mdl - * - * NOTES: - * N/A - */ - -ULONG -KsQueryMdlsSize (PMDL Mdl) -{ - PMDL Next = Mdl; - ULONG Length = 0; - - - // - // Walking the MDL Chain ... - // - - while (Next) { - Length += MmGetMdlByteCount(Next); - Next = Next->Next; - } - - return (Length); -} - - -/* - * KsLockUserBuffer - * Allocate MDL for the buffer and lock the pages into - * nonpaged pool - * - * Arguments: - * UserBuffer: the user buffer to be locked - * Length: length in bytes of the buffer - * Operation: read or write access - * pMdl: the result of the created mdl - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * NOTES: - * N/A - */ - -NTSTATUS -KsLockUserBuffer ( - IN PVOID UserBuffer, - IN BOOLEAN bPaged, - IN ULONG Length, - IN LOCK_OPERATION Operation, - OUT PMDL * pMdl - ) -{ - NTSTATUS Status; - PMDL Mdl = NULL; - - LASSERT(UserBuffer != NULL); - - *pMdl = NULL; - - Mdl = IoAllocateMdl( - UserBuffer, - Length, - FALSE, - FALSE, - NULL - ); - - if (Mdl == NULL) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - __try { - - if (bPaged) { - MmProbeAndLockPages( - Mdl, - KernelMode, - Operation - ); - } else { - MmBuildMdlForNonPagedPool( - Mdl - ); - } - - Status = STATUS_SUCCESS; - - *pMdl = Mdl; - - } __except (EXCEPTION_EXECUTE_HANDLER) { - - IoFreeMdl(Mdl); - - Mdl = NULL; - - cfs_enter_debugger(); - - Status = STATUS_INVALID_USER_BUFFER; - } - } - - return Status; -} - -/* - * KsMapMdlBuffer - * Map the mdl into a buffer in kernel space - * - * Arguments: - * Mdl: the mdl to be mapped - * - * Return Value: - * PVOID: the buffer mapped or NULL in failure - * - * NOTES: - * N/A - */ - -PVOID -KsMapMdlBuffer (PMDL Mdl) -{ - LASSERT(Mdl != NULL); - - return MmGetSystemAddressForMdlSafe( - Mdl, - NormalPagePriority - ); -} - - -/* - * KsReleaseMdl - * Unlock all the pages in the mdl - * - * Arguments: - * Mdl: memory description list to be released - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -VOID -KsReleaseMdl (IN PMDL Mdl, - IN int Paged ) -{ - LASSERT(Mdl != NULL); - - while (Mdl) { - - PMDL Next; - - Next = Mdl->Next; - - if (Paged) { - MmUnlockPages(Mdl); - } - - IoFreeMdl(Mdl); - - Mdl = Next; - } -} - - -/* - * ks_lock_buffer - * allocate MDL for the user spepcified buffer and lock (paging-in) - * all the pages of the buffer into system memory - * - * Arguments: - * buffer: the user buffer to be locked - * length: length in bytes of the buffer - * access: read or write access - * mdl: the result of the created mdl - * - * Return Value: - * int: the ks error code: 0: success / -x: failture - * - * Notes: - * N/A - */ - -int -ks_lock_buffer ( - void * buffer, - int paged, - int length, - LOCK_OPERATION access, - ksock_mdl_t ** kmdl - ) -{ - NTSTATUS status; - - status = KsLockUserBuffer( - buffer, - paged !=0, - length, - access, - kmdl - ); - - return cfs_error_code(status); -} - - -/* - * ks_map_mdl - * Map the mdl pages into kernel space - * - * Arguments: - * mdl: the mdl to be mapped - * - * Return Value: - * void *: the buffer mapped or NULL in failure - * - * Notes: - * N/A - */ - -void * -ks_map_mdl (ksock_mdl_t * mdl) -{ - LASSERT(mdl != NULL); - - return KsMapMdlBuffer(mdl); -} - -/* - * ks_release_mdl - * Unlock all the pages in the mdl and release the mdl - * - * Arguments: - * mdl: memory description list to be released - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_release_mdl (ksock_mdl_t *mdl, int paged) -{ - LASSERT(mdl != NULL); - - KsReleaseMdl(mdl, paged); -} - - -/* - * ks_create_tconn - * allocate a new tconn structure from the SLAB cache or - * NonPaged sysetm pool - * - * Arguments: - * N/A - * - * Return Value: - * ksock_tconn_t *: the address of tconn or NULL if it fails - * - * NOTES: - * N/A - */ - -ksock_tconn_t * -ks_create_tconn() -{ - ksock_tconn_t * tconn = NULL; - - /* allocate ksoc_tconn_t from the slab cache memory */ - - tconn = (ksock_tconn_t *)cfs_mem_cache_alloc( - ks_data.ksnd_tconn_slab, CFS_ALLOC_ZERO); - - if (tconn) { - - /* zero tconn elements */ - memset(tconn, 0, sizeof(ksock_tconn_t)); - - /* initialize the tconn ... */ - tconn->kstc_magic = KS_TCONN_MAGIC; - - ExInitializeWorkItem( - &(tconn->kstc_disconnect.WorkItem), - KsDisconnectHelper, - &(tconn->kstc_disconnect) - ); - - KeInitializeEvent( - &(tconn->kstc_disconnect.Event), - SynchronizationEvent, - FALSE ); - - ExInitializeWorkItem( - &(tconn->kstc_destroy), - ks_destroy_tconn, - tconn - ); - - spin_lock_init(&(tconn->kstc_lock)); - - ks_get_tconn(tconn); - - spin_lock(&(ks_data.ksnd_tconn_lock)); - - /* attach it into global list in ks_data */ - - list_add(&(tconn->kstc_list), &(ks_data.ksnd_tconns)); - ks_data.ksnd_ntconns++; - spin_unlock(&(ks_data.ksnd_tconn_lock)); - - tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000; - } - - return (tconn); -} - - -/* - * ks_free_tconn - * free the tconn structure to the SLAB cache or NonPaged - * sysetm pool - * - * Arguments: - * tconn: the tcon is to be freed - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_free_tconn(ksock_tconn_t * tconn) -{ - LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0); - - spin_lock(&(ks_data.ksnd_tconn_lock)); - - /* remove it from the global list */ - list_del(&tconn->kstc_list); - ks_data.ksnd_ntconns--; - - /* if this is the last tconn, it would be safe for - ks_tdi_fini_data to quit ... */ - if (ks_data.ksnd_ntconns == 0) { - cfs_wake_event(&ks_data.ksnd_tconn_exit); - } - spin_unlock(&(ks_data.ksnd_tconn_lock)); - - /* free the structure memory */ - cfs_mem_cache_free(ks_data.ksnd_tconn_slab, tconn); -} - - -/* - * ks_init_listener - * Initialize the tconn as a listener (daemon) - * - * Arguments: - * tconn: the listener tconn - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_init_listener( - ksock_tconn_t * tconn - ) -{ - /* preparation: intialize the tconn members */ - - tconn->kstc_type = kstt_listener; - - RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); - - CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list)); - CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list)); - - cfs_init_event( &(tconn->listener.kstc_accept_event), - TRUE, - FALSE ); - - cfs_init_event( &(tconn->listener.kstc_destroy_event), - TRUE, - FALSE ); - - tconn->kstc_state = ksts_inited; -} - - -/* - * ks_init_sender - * Initialize the tconn as a sender - * - * Arguments: - * tconn: the sender tconn - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_init_sender( - ksock_tconn_t * tconn - ) -{ - tconn->kstc_type = kstt_sender; - RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); - - KsInitializeKsChain(&(tconn->sender.kstc_recv)); - KsInitializeKsChain(&(tconn->sender.kstc_send)); - - tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - - tconn->kstc_state = ksts_inited; -} - -/* - * ks_init_child - * Initialize the tconn as a child - * - * Arguments: - * tconn: the child tconn - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -void -ks_init_child( - ksock_tconn_t * tconn - ) -{ - tconn->kstc_type = kstt_child; - RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); - - KsInitializeKsChain(&(tconn->child.kstc_recv)); - KsInitializeKsChain(&(tconn->child.kstc_send)); - - tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - - tconn->kstc_state = ksts_inited; -} - -/* - * ks_get_tconn - * increase the reference count of the tconn with 1 - * - * Arguments: - * tconn: the tdi connection to be referred - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -void -ks_get_tconn( - ksock_tconn_t * tconn - ) -{ - atomic_inc(&(tconn->kstc_refcount)); -} - -/* - * ks_put_tconn - * decrease the reference count of the tconn and destroy - * it if the refercount becomes 0. - * - * Arguments: - * tconn: the tdi connection to be dereferred - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -void -ks_put_tconn( - ksock_tconn_t *tconn - ) -{ - if (atomic_dec_and_test(&(tconn->kstc_refcount))) { - - spin_lock(&(tconn->kstc_lock)); - - if ( ( tconn->kstc_type == kstt_child || - tconn->kstc_type == kstt_sender ) && - ( tconn->kstc_state == ksts_connected ) ) { - - spin_unlock(&(tconn->kstc_lock)); - - ks_abort_tconn(tconn); - - } else { - - if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) { - cfs_enter_debugger(); - } else { - ExQueueWorkItem( - &(tconn->kstc_destroy), - DelayedWorkQueue - ); - - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY); - } - - spin_unlock(&(tconn->kstc_lock)); - } - } -} - -/* - * ks_destroy_tconn - * cleanup the tdi connection and free it - * - * Arguments: - * tconn: the tdi connection to be cleaned. - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -void -ks_destroy_tconn( - ksock_tconn_t * tconn - ) -{ - LASSERT(tconn->kstc_refcount.counter == 0); - - if (tconn->kstc_type == kstt_listener) { - - ks_reset_handlers(tconn); - - /* for listener, we just need to close the address object */ - KsCloseAddress( - tconn->kstc_addr.Handle, - tconn->kstc_addr.FileObject - ); - - tconn->kstc_state = ksts_inited; - - } else if (tconn->kstc_type == kstt_child) { - - /* for child tdi conections */ - - /* disassociate the relation between it's connection object - and the address object */ - - if (tconn->kstc_state == ksts_associated) { - KsDisassociateAddress( - tconn->child.kstc_info.FileObject - ); - } - - /* release the connection object */ - - KsCloseConnection( - tconn->child.kstc_info.Handle, - tconn->child.kstc_info.FileObject - ); - - /* release it's refer of it's parent's address object */ - KsCloseAddress( - NULL, - tconn->kstc_addr.FileObject - ); - - spin_lock(&tconn->child.kstc_parent->kstc_lock); - spin_lock(&tconn->kstc_lock); - - tconn->kstc_state = ksts_inited; - - /* remove it frome it's parent's queues */ - - if (tconn->child.kstc_queued) { - - list_del(&(tconn->child.kstc_link)); - - if (tconn->child.kstc_queueno) { - - LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0); - tconn->child.kstc_parent->listener.kstc_accepted.num -= 1; - - } else { - - LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0); - tconn->child.kstc_parent->listener.kstc_listening.num -= 1; - } - - tconn->child.kstc_queued = FALSE; - } - - spin_unlock(&tconn->kstc_lock); - spin_unlock(&tconn->child.kstc_parent->kstc_lock); - - /* drop the reference of the parent tconn */ - ks_put_tconn(tconn->child.kstc_parent); - - } else if (tconn->kstc_type == kstt_sender) { - - ks_reset_handlers(tconn); - - /* release the connection object */ - - KsCloseConnection( - tconn->sender.kstc_info.Handle, - tconn->sender.kstc_info.FileObject - ); - - /* release it's refer of it's parent's address object */ - KsCloseAddress( - tconn->kstc_addr.Handle, - tconn->kstc_addr.FileObject - ); - - tconn->kstc_state = ksts_inited; - - } else { - cfs_enter_debugger(); - } - - /* free the tconn structure ... */ - - ks_free_tconn(tconn); -} - -int -ks_query_data( - ksock_tconn_t * tconn, - size_t * size, - int bIsExpedited ) -{ - int rc = 0; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - - *size = 0; - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); - - if ( tconn->kstc_type != kstt_sender && - tconn->kstc_type != kstt_child) { - rc = -EINVAL; - spin_unlock(&(tconn->kstc_lock)); - goto errorout; - } - - if (tconn->kstc_state != ksts_connected) { - rc = -ENOTCONN; - spin_unlock(&(tconn->kstc_lock)); - goto errorout; - } - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } - - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - - *size = KsTsduMgr->TotalBytes; - spin_unlock(&(tconn->kstc_lock)); - -errorout: - - ks_put_tconn(tconn); - - return (rc); -} - -/* - * ks_get_tcp_option - * Query the the options of the tcp stream connnection - * - * Arguments: - * tconn: the tdi connection - * ID: option id - * OptionValue: buffer to store the option value - * Length: the length of the value, to be returned - * - * Return Value: - * int: ks return code - * - * NOTES: - * N/A - */ - -int -ks_get_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - PULONG Length - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - IO_STATUS_BLOCK IoStatus; - - TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx; - - PFILE_OBJECT ConnectionObject; - PDEVICE_OBJECT DeviceObject = NULL; - - PIRP Irp = NULL; - PIO_STACK_LOCATION IrpSp = NULL; - - KEVENT Event; - - /* make sure the tdi connection is connected ? */ - - ks_get_tconn(tconn); - - if (tconn->kstc_state != ksts_connected) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - LASSERT(tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child); - - if (tconn->kstc_type == kstt_sender) { - ConnectionObject = tconn->sender.kstc_info.FileObject; - } else { - ConnectionObject = tconn->child.kstc_info.FileObject; - } - - QueryInfoEx.ID.toi_id = ID; - QueryInfoEx.ID.toi_type = INFO_TYPE_CONNECTION; - QueryInfoEx.ID.toi_class = INFO_CLASS_PROTOCOL; - QueryInfoEx.ID.toi_entity.tei_entity = CO_TL_ENTITY; - QueryInfoEx.ID.toi_entity.tei_instance = 0; - - RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE); - - KeInitializeEvent(&Event, NotificationEvent, FALSE); - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - Irp = IoBuildDeviceIoControlRequest( - IOCTL_TCP_QUERY_INFORMATION_EX, - DeviceObject, - &QueryInfoEx, - sizeof(TCP_REQUEST_QUERY_INFORMATION_EX), - OptionValue, - *Length, - FALSE, - &Event, - &IoStatus - ); - - if (Irp == NULL) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - IrpSp = IoGetNextIrpStackLocation(Irp); - - if (IrpSp == NULL) { - - IoFreeIrp(Irp); - Irp = NULL; - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - IrpSp->FileObject = ConnectionObject; - IrpSp->DeviceObject = DeviceObject; - - Status = IoCallDriver(DeviceObject, Irp); - - if (Status == STATUS_PENDING) { - - KeWaitForSingleObject( - &Event, - Executive, - KernelMode, - FALSE, - NULL - ); - - Status = IoStatus.Status; - } - - - if (NT_SUCCESS(Status)) { - *Length = IoStatus.Information; - } else { - cfs_enter_debugger(); - memset(OptionValue, 0, *Length); - Status = STATUS_SUCCESS; - } - -errorout: - - ks_put_tconn(tconn); - - return cfs_error_code(Status); -} - -/* - * ks_set_tcp_option - * Set the the options for the tcp stream connnection - * - * Arguments: - * tconn: the tdi connection - * ID: option id - * OptionValue: buffer containing the new option value - * Length: the length of the value - * - * Return Value: - * int: ks return code - * - * NOTES: - * N/A - */ - -NTSTATUS -ks_set_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - ULONG Length - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - IO_STATUS_BLOCK IoStatus; - - ULONG SetInfoExLength; - PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL; - - PFILE_OBJECT ConnectionObject; - PDEVICE_OBJECT DeviceObject = NULL; - - PIRP Irp = NULL; - PIO_STACK_LOCATION IrpSp = NULL; - - PKEVENT Event; - - /* make sure the tdi connection is connected ? */ - - ks_get_tconn(tconn); - - if (tconn->kstc_state != ksts_connected) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - LASSERT(tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child); - - if (tconn->kstc_type == kstt_sender) { - ConnectionObject = tconn->sender.kstc_info.FileObject; - } else { - ConnectionObject = tconn->child.kstc_info.FileObject; - } - - SetInfoExLength = sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT); - - SetInfoEx = ExAllocatePoolWithTag( - NonPagedPool, - SetInfoExLength, - 'TSSK' - ); - - if (SetInfoEx == NULL) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - SetInfoEx->ID.toi_id = ID; - - SetInfoEx->ID.toi_type = INFO_TYPE_CONNECTION; - SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL; - SetInfoEx->ID.toi_entity.tei_entity = CO_TL_ENTITY; - SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE; - - SetInfoEx->BufferSize = Length; - RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length); - - Event = (PKEVENT)(&(SetInfoEx->Buffer[Length])); - KeInitializeEvent(Event, NotificationEvent, FALSE); - - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - Irp = IoBuildDeviceIoControlRequest( - IOCTL_TCP_SET_INFORMATION_EX, - DeviceObject, - SetInfoEx, - SetInfoExLength, - NULL, - 0, - FALSE, - Event, - &IoStatus - ); - - if (Irp == NULL) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - IrpSp = IoGetNextIrpStackLocation(Irp); - - if (IrpSp == NULL) { - IoFreeIrp(Irp); - Irp = NULL; - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - IrpSp->FileObject = ConnectionObject; - IrpSp->DeviceObject = DeviceObject; - - Status = IoCallDriver(DeviceObject, Irp); - - if (Status == STATUS_PENDING) { - - KeWaitForSingleObject( - Event, - Executive, - KernelMode, - FALSE, - NULL - ); - - Status = IoStatus.Status; - } - -errorout: - - if (SetInfoEx) { - ExFreePool(SetInfoEx); - } - - if (!NT_SUCCESS(Status)) { - printk("ks_set_tcp_option: error setup tcp option: ID (%d), Status = %xh\n", - ID, Status); - Status = STATUS_SUCCESS; - } - - ks_put_tconn(tconn); - - return cfs_error_code(Status); -} - -/* - * ks_bind_tconn - * bind the tdi connection object with an address - * - * Arguments: - * tconn: tconn to be bound - * parent: the parent tconn object - * ipaddr: the ip address - * port: the port number - * - * Return Value: - * int: 0 for success or ks error codes. - * - * NOTES: - * N/A - */ - -int -ks_bind_tconn ( - ksock_tconn_t * tconn, - ksock_tconn_t * parent, - ulong_ptr addr, - unsigned short port - ) -{ - NTSTATUS status; - int rc = 0; - - ksock_tdi_addr_t taddr; - - memset(&taddr, 0, sizeof(ksock_tdi_addr_t)); - - if (tconn->kstc_state != ksts_inited) { - - status = STATUS_INVALID_PARAMETER; - rc = cfs_error_code(status); - - goto errorout; - - } else if (tconn->kstc_type == kstt_child) { - - if (NULL == parent) { - status = STATUS_INVALID_PARAMETER; - rc = cfs_error_code(status); - - goto errorout; - } - - /* refer it's parent's address object */ - - taddr = parent->kstc_addr; - ObReferenceObject(taddr.FileObject); - - ks_get_tconn(parent); - - } else { - - PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi); - ULONG AddrLen = 0; - - /* intialize the tdi address*/ - - TdiAddress->TAAddressCount = 1; - TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; - TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; - - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr); - - memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); - - - /* open the transport address object */ - - AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + - TDI_ADDRESS_LENGTH_IP; - - status = KsOpenAddress( - &(tconn->kstc_dev), - &(taddr.Tdi), - AddrLen, - &(taddr.Handle), - &(taddr.FileObject) - ); - - if (!NT_SUCCESS(status)) { - - KsPrint((0, "ks_bind_tconn: failed to open ip addr object (%x:%d), status = %xh\n", - addr, port, status )); - rc = cfs_error_code(status); - goto errorout; - } - } - - if (tconn->kstc_type == kstt_child) { - tconn->child.kstc_parent = parent; - } - - tconn->kstc_state = ksts_bind; - tconn->kstc_addr = taddr; - -errorout: - - return (rc); -} - -/* - * ks_build_tconn - * build tcp/streaming connection to remote peer - * - * Arguments: - * tconn: tconn to be connected to the peer - * addr: the peer's ip address - * port: the peer's port number - * - * Return Value: - * int: 0 for success or ks error codes. - * - * Notes: - * N/A - */ - -int -ks_build_tconn( - ksock_tconn_t * tconn, - ulong_ptr addr, - unsigned short port - ) -{ - int rc = 0; - NTSTATUS status = STATUS_SUCCESS; - - - PFILE_OBJECT ConnectionObject = NULL; - PDEVICE_OBJECT DeviceObject = NULL; - - PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; - ULONG AddrLength; - - PIRP Irp = NULL; - - LASSERT(tconn->kstc_type == kstt_sender); - LASSERT(tconn->kstc_state == ksts_bind); - - ks_get_tconn(tconn); - - { - /* set the event callbacks */ - rc = ks_set_handlers(tconn); - - if (rc < 0) { - cfs_enter_debugger(); - goto errorout; - } - } - - /* create the connection file handle / object */ - status = KsOpenConnection( - &(tconn->kstc_dev), - (CONNECTION_CONTEXT)tconn, - &(tconn->sender.kstc_info.Handle), - &(tconn->sender.kstc_info.FileObject) - ); - - if (!NT_SUCCESS(status)) { - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - /* associdate the the connection with the adress object of the tconn */ - - status = KsAssociateAddress( - tconn->kstc_addr.Handle, - tconn->sender.kstc_info.FileObject - ); - - if (!NT_SUCCESS(status)) { - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - tconn->kstc_state = ksts_associated; - - /* Allocating Connection Info Together with the Address */ - AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) - + TDI_ADDRESS_LENGTH_IP; - - ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( - NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK'); - - if (NULL == ConnectionInfo) { - - status = STATUS_INSUFFICIENT_RESOURCES; - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - /* Initializing ConnectionInfo ... */ - { - PTRANSPORT_ADDRESS TdiAddress; - - /* ConnectionInfo settings */ - - ConnectionInfo->UserDataLength = 0; - ConnectionInfo->UserData = NULL; - ConnectionInfo->OptionsLength = 0; - ConnectionInfo->Options = NULL; - ConnectionInfo->RemoteAddressLength = AddrLength; - ConnectionInfo->RemoteAddress = ConnectionInfo + 1; - - - /* intialize the tdi address*/ - - TdiAddress = ConnectionInfo->RemoteAddress; - - TdiAddress->TAAddressCount = 1; - TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; - TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; - - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr); - - memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); - } - - /* Now prepare to connect the remote peer ... */ - - ConnectionObject = tconn->sender.kstc_info.FileObject; - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - /* allocate a new Irp */ - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - status = STATUS_INSUFFICIENT_RESOURCES; - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - /* setup the Irp */ - - TdiBuildConnect( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL, - NULL, - ConnectionInfo, - NULL - ); - - - /* sumbit the Irp to the underlying transport driver */ - status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - NULL - ); - - spin_lock(&(tconn->kstc_lock)); - - if (NT_SUCCESS(status)) { - - /* Connected! the conneciton is built successfully. */ - - tconn->kstc_state = ksts_connected; - - tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo; - tconn->sender.kstc_info.Remote = ConnectionInfo->RemoteAddress; - - spin_unlock(&(tconn->kstc_lock)); - - } else { - - /* Not connected! Abort it ... */ - - if (rc != 0) { - cfs_enter_debugger(); - } - - Irp = NULL; - rc = cfs_error_code(status); - - tconn->kstc_state = ksts_associated; - spin_unlock(&(tconn->kstc_lock)); - - /* disassocidate the connection and the address object, - after cleanup, it's safe to set the state to abort ... */ - - if ( NT_SUCCESS(KsDisassociateAddress( - tconn->sender.kstc_info.FileObject))) { - tconn->kstc_state = ksts_aborted; - } - - /* reset the event callbacks */ - rc = ks_reset_handlers(tconn); - - goto errorout; - } - -errorout: - - if (NT_SUCCESS(status)) { - - ks_query_local_ipaddr(tconn); - - } else { - - if (ConnectionInfo) { - ExFreePool(ConnectionInfo); - } - if (Irp) { - IoFreeIrp(Irp); - } - } - - ks_put_tconn(tconn); - - return (rc); -} - - -/* - * ks_disconnect_tconn - * disconnect the tconn from a connection - * - * Arguments: - * tconn: the tdi connecton object connected already - * flags: flags & options for disconnecting - * - * Return Value: - * int: ks error code - * - * Notes: - * N/A - */ - -int -ks_disconnect_tconn( - ksock_tconn_t * tconn, - ulong_ptr flags - ) -{ - NTSTATUS status = STATUS_SUCCESS; - - ksock_tconn_info_t * info; - - PFILE_OBJECT ConnectionObject; - PDEVICE_OBJECT DeviceObject = NULL; - - PIRP Irp = NULL; - - KEVENT Event; - - ks_get_tconn(tconn); - - /* make sure tt's connected already and it - must be a sender or a child ... */ - - LASSERT(tconn->kstc_state == ksts_connected); - LASSERT( tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child); - - /* reset all the event handlers to NULL */ - - if (tconn->kstc_type != kstt_child) { - ks_reset_handlers (tconn); - } - - /* Disconnecting to the remote peer ... */ - - if (tconn->kstc_type == kstt_sender) { - info = &(tconn->sender.kstc_info); - } else { - info = &(tconn->child.kstc_info); - } - - ConnectionObject = info->FileObject; - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - /* allocate an Irp and setup it */ - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - status = STATUS_INSUFFICIENT_RESOURCES; - cfs_enter_debugger(); - goto errorout; - } - - KeInitializeEvent( - &Event, - SynchronizationEvent, - FALSE - ); - - TdiBuildDisconnect( - Irp, - DeviceObject, - ConnectionObject, - KsDisconectCompletionRoutine, - &Event, - NULL, - flags, - NULL, - NULL - ); - - /* issue the Irp to the underlying transport - driver to disconnect the connection */ - - status = IoCallDriver(DeviceObject, Irp); - - if (STATUS_PENDING == status) { - - status = KeWaitForSingleObject( - &Event, - Executive, - KernelMode, - FALSE, - NULL - ); - - status = Irp->IoStatus.Status; - } - - KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n", - status, KsNtStatusToString(status))); - - IoFreeIrp(Irp); - - if (info->ConnectionInfo) { - - /* disassociate the association between connection/address objects */ - - status = KsDisassociateAddress(ConnectionObject); - - if (!NT_SUCCESS(status)) { - cfs_enter_debugger(); - } - - spin_lock(&(tconn->kstc_lock)); - - /* cleanup the tsdumgr Lists */ - KsCleanupTsdu (tconn); - - /* set the state of the tconn */ - if (NT_SUCCESS(status)) { - tconn->kstc_state = ksts_disconnected; - } else { - tconn->kstc_state = ksts_associated; - } - - /* free the connection info to system pool*/ - ExFreePool(info->ConnectionInfo); - info->ConnectionInfo = NULL; - info->Remote = NULL; - - spin_unlock(&(tconn->kstc_lock)); - } - - status = STATUS_SUCCESS; - -errorout: - - ks_put_tconn(tconn); - - return cfs_error_code(status); -} - - -/* - * ks_abort_tconn - * The connection is broken un-expectedly. We need do - * some cleanup. - * - * Arguments: - * tconn: the tdi connection - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_abort_tconn( - ksock_tconn_t * tconn - ) -{ - PKS_DISCONNECT_WORKITEM WorkItem = NULL; - - WorkItem = &(tconn->kstc_disconnect); - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); - - if (tconn->kstc_state != ksts_connected) { - ks_put_tconn(tconn); - } else { - - if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { - - WorkItem->Flags = TDI_DISCONNECT_ABORT; - WorkItem->tconn = tconn; - - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - - ExQueueWorkItem( - &(WorkItem->WorkItem), - DelayedWorkQueue - ); - } - } - - spin_unlock(&(tconn->kstc_lock)); -} - - -/* - * ks_query_local_ipaddr - * query the local connection ip address - * - * Arguments: - * tconn: the tconn which is connected - * - * Return Value: - * int: ks error code - * - * Notes: - * N/A - */ - -int -ks_query_local_ipaddr( - ksock_tconn_t * tconn - ) -{ - PFILE_OBJECT FileObject = NULL; - NTSTATUS status; - - PTRANSPORT_ADDRESS TdiAddress; - ULONG AddressLength; - - if (tconn->kstc_type == kstt_sender) { - FileObject = tconn->sender.kstc_info.FileObject; - } else if (tconn->kstc_type == kstt_child) { - FileObject = tconn->child.kstc_info.FileObject; - } else { - status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - TdiAddress = &(tconn->kstc_addr.Tdi); - AddressLength = MAX_ADDRESS_LENGTH; - - status = KsQueryIpAddress(FileObject, TdiAddress, &AddressLength); - - if (NT_SUCCESS(status)) { - - KsPrint((0, "ks_query_local_ipaddr: Local ip address = %xh port = %xh\n", - ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr, - ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port )); - } else { - KsPrint((0, "KsQueryonnectionIpAddress: Failed to query the connection local ip address.\n")); - } - -errorout: - - return cfs_error_code(status); -} - -/* - * ks_send_mdl - * send MDL chain to the peer for a stream connection - * - * Arguments: - * tconn: tdi connection object - * tx: the transmit context - * mdl: the mdl chain containing the data - * len: length of the data - * flags: flags of the transmission - * - * Return Value: - * ks return code - * - * Notes: - * N/A - */ - -int -ks_send_mdl( - ksock_tconn_t * tconn, - void * tx, - ksock_mdl_t * mdl, - int len, - int flags - ) -{ - NTSTATUS Status; - int rc = 0; - ulong_ptr length; - ulong_ptr tflags; - ksock_tdi_tx_t * context; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_BUF KsTsduBuf; - PKS_TSDU_DAT KsTsduDat; - - BOOLEAN bNewTsdu = FALSE; /* newly allocated */ - BOOLEAN bNewBuff = FALSE; /* newly allocated */ - - BOOLEAN bBuffed; /* bufferred sending */ - - PUCHAR Buffer = NULL; - ksock_mdl_t * NewMdl = NULL; - - PIRP Irp = NULL; - PFILE_OBJECT ConnObject; - PDEVICE_OBJECT DeviceObject; - - BOOLEAN bIsNonBlock; - - ks_get_tconn(tconn); - - tflags = ks_tdi_send_flags(flags); - bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT); - - spin_lock(&tconn->kstc_lock); - - LASSERT( tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child ); - - if (tconn->kstc_state != ksts_connected) { - spin_unlock(&tconn->kstc_lock); - ks_put_tconn(tconn); - return -ENOTCONN; - } - - /* get the latest Tsdu buffer form TsduMgr list. - just set NULL if the list is empty. */ - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_send); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_send); - } - - if (cfs_is_flag_set(tflags, TDI_SEND_EXPEDITED)) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - - if (KsTsduMgr->TotalBytes + len <= tconn->kstc_snd_wnd) { - bBuffed = TRUE; - } else { - bBuffed = FALSE; - } - - /* do the preparation work for bufferred sending */ - - if (bBuffed) { - - /* if the data is even larger than the biggest Tsdu, we have - to allocate new buffer and use TSDU_TYOE_BUF to store it */ - - if ( KS_TSDU_STRU_SIZE((ULONG)len) > ks_data.ksnd_tsdu_size - - KS_DWORD_ALIGN(sizeof(KS_TSDU))) { - bNewBuff = TRUE; - } - - if (list_empty(&(KsTsduMgr->TsduList))) { - - LASSERT(KsTsduMgr->NumOfTsdu == 0); - KsTsdu = NULL; - - } else { - - LASSERT(KsTsduMgr->NumOfTsdu > 0); - KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - - /* check whether KsTsdu free space is enough, or we need alloc new Tsdu */ - if (bNewBuff) { - if (sizeof(KS_TSDU_BUF) + KsTsdu->LastOffset > KsTsdu->TotalLength) { - KsTsdu = NULL; - } - } else { - if ( KS_TSDU_STRU_SIZE((ULONG)len) > - KsTsdu->TotalLength - KsTsdu->LastOffset ) { - KsTsdu = NULL; - } - } - } - - /* if there's no Tsdu or the free size is not enough for the - KS_TSDU_BUF or KS_TSDU_DAT. We need re-allocate a new Tsdu. */ - - if (NULL == KsTsdu) { - - KsTsdu = KsAllocateKsTsdu(); - - if (NULL == KsTsdu) { - bBuffed = FALSE; - bNewBuff = FALSE; - } else { - bNewTsdu = TRUE; - } - } - - /* process the case that a new buffer is to be allocated from system memory */ - if (bNewBuff) { - - /* now allocating internal buffer to contain the payload */ - Buffer = ExAllocatePool(NonPagedPool, len); - - if (NULL == Buffer) { - bBuffed = FALSE; - } - } - } - - if (bBuffed) { - - if (bNewBuff) { - - /* queue a new KS_TSDU_BUF to the Tsdu buffer */ - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - - KsTsduBuf->TsduFlags = 0; - KsTsduBuf->DataLength = (ULONG)len; - KsTsduBuf->StartOffset = 0; - KsTsduBuf->UserBuffer = Buffer; - } else { - /* queue a new KS_TSDU_BUF to the Tsdu buffer */ - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - - KsTsduDat->TsduFlags = 0; - KsTsduDat->DataLength = (ULONG)len; - KsTsduDat->StartOffset = 0; - KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE((ULONG)len); - - Buffer = &KsTsduDat->Data[0]; - } - - /* now locking the Buffer and copy user payload into the buffer */ - ASSERT(Buffer != NULL); - - rc = ks_lock_buffer(Buffer, FALSE, len, IoReadAccess, &NewMdl); - if (rc != 0) { - printk("ks_send_mdl: bufferred: error allocating mdl.\n"); - bBuffed = FALSE; - } else { - ULONG BytesCopied = 0; - TdiCopyMdlToBuffer(mdl, 0, Buffer, 0, (ULONG)len, &BytesCopied); - if (BytesCopied != (ULONG) len) { - bBuffed = FALSE; - } - } - - /* Do the finializing job if we succeed to to lock the buffer and move - user data. Or we need do cleaning up ... */ - if (bBuffed) { - - if (bNewBuff) { - KsTsduBuf->TsduType = TSDU_TYPE_BUF; - KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); - - } else { - KsTsduDat->TsduType = TSDU_TYPE_DAT; - KsTsdu->LastOffset += KsTsduDat->TotalLength; - } - - /* attach it to the TsduMgr list if the Tsdu is newly created. */ - if (bNewTsdu) { - - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } - - } else { - - if (NewMdl) { - ks_release_mdl(NewMdl, FALSE); - NewMdl = NULL; - } - - if (bNewBuff) { - ExFreePool(Buffer); - Buffer = NULL; - bNewBuff = FALSE; - } - } - } - - /* update the TotalBytes being in sending */ - KsTsduMgr->TotalBytes += (ULONG)len; - - spin_unlock(&tconn->kstc_lock); - - /* cleanup the Tsdu if not successful */ - if (!bBuffed && bNewTsdu) { - KsPutKsTsdu(KsTsdu); - bNewTsdu = FALSE; - KsTsdu = NULL; - } - - /* we need allocate the ksock_tx_t structure from memory pool. */ - - context = cfs_alloc(sizeof(ksock_tdi_tx_t) + sizeof(KEVENT),0); - if (!context) { - /* release the chained mdl */ - ks_release_mdl(mdl, FALSE); - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - /* intialize the TcpContext */ - - memset(context,0, sizeof(ksock_tdi_tx_t) + sizeof(KEVENT)); - - context->tconn = tconn; - context->Event = (PKEVENT) ((PUCHAR)context + sizeof(ksock_tdi_tx_t)); - - KeInitializeEvent(context->Event, SynchronizationEvent, FALSE); - - if (bBuffed) { - - /* for bufferred transmission, we need set - the internal completion routine. */ - - context->CompletionRoutine = KsTcpSendCompletionRoutine; - context->KsTsduMgr = KsTsduMgr; - context->CompletionContext = KsTsdu; - context->CompletionContext2 = (bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat); - context->bCounted = FALSE; - - } else if (bIsNonBlock) { - - /* for non-blocking transmission, we need set - the internal completion routine too. */ - - context->CompletionRoutine = KsTcpSendCompletionRoutine; - context->CompletionContext = tx; - context->KsTsduMgr = KsTsduMgr; - context->bCounted = TRUE; - context->ReferCount = 2; - } - - if (tconn->kstc_type == kstt_sender) { - ConnObject = tconn->sender.kstc_info.FileObject; - } else { - LASSERT(tconn->kstc_type == kstt_child); - ConnObject = tconn->child.kstc_info.FileObject; - } - - DeviceObject = IoGetRelatedDeviceObject(ConnObject); - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - /* release the chained mdl */ - ks_release_mdl(mdl, FALSE); - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - length = KsQueryMdlsSize(mdl); - - LASSERT((ULONG)len <= length); - - ks_get_tconn(tconn); - - TdiBuildSend( - Irp, - DeviceObject, - ConnObject, - KsTcpCompletionRoutine, - context, - (bBuffed ? NewMdl : mdl), - (bBuffed ? (tflags | TDI_SEND_NON_BLOCKING) : tflags), - (ULONG)len; - ); - - Status = IoCallDriver(DeviceObject, Irp); - - if (bBuffed) { - ks_release_mdl(mdl, FALSE); - NewMdl = NULL; - } - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - rc = cfs_error_code(Status); - goto errorout; - } - - if (bBuffed) { - Status = STATUS_SUCCESS; - rc = len; - context = NULL; - } else { - if (bIsNonBlock) { - if (InterlockedDecrement(&context->ReferCount) == 0) { - Status = Irp->IoStatus.Status; - } else { - Status = STATUS_PENDING; - context = NULL; - } - } else { - if (STATUS_PENDING == Status) { - Status = KeWaitForSingleObject( - context->Event, - Executive, - KernelMode, - FALSE, - NULL - ); - - if (NT_SUCCESS(Status)) { - Status = Irp->IoStatus.Status; - } - } - } - - if (Status == STATUS_SUCCESS) { - rc = (int)(Irp->IoStatus.Information); - - spin_lock(&tconn->kstc_lock); - KsTsduMgr->TotalBytes -= rc; - spin_unlock(&tconn->kstc_lock); - - } else { - rc = cfs_error_code(Status); - } - } - -errorout: - - if (bBuffed) { - - if (NewMdl) { - ks_release_mdl(NewMdl, FALSE); - NewMdl = NULL; - } - - if (bNewBuff) { - if (!NT_SUCCESS(Status)) { - ExFreePool(Buffer); - Buffer = NULL; - } - } - - } else { - - if (Status != STATUS_PENDING) { - - if (Irp) { - - /* Freeing the Irp ... */ - - IoFreeIrp(Irp); - Irp = NULL; - } - } - } - - if (!NT_SUCCESS(Status)) { - - spin_lock(&tconn->kstc_lock); - - KsTsduMgr->TotalBytes -= (ULONG)len; - - if (bBuffed) { - - /* attach it to the TsduMgr list if the Tsdu is newly created. */ - if (bNewTsdu) { - - list_del(&(KsTsdu->Link)); - KsTsduMgr->NumOfTsdu--; - - KsPutKsTsdu(KsTsdu); - } else { - if (bNewBuff) { - if ( (ulong_ptr)KsTsduBuf + sizeof(KS_TSDU_BUF) == - (ulong_ptr)KsTsdu + KsTsdu->LastOffset) { - KsTsdu->LastOffset -= sizeof(KS_TSDU_BUF); - KsTsduBuf->TsduType = 0; - } else { - cfs_enter_debugger(); - KsTsduBuf->StartOffset = KsTsduBuf->DataLength; - } - } else { - if ( (ulong_ptr)KsTsduDat + KsTsduDat->TotalLength == - (ulong_ptr)KsTsdu + KsTsdu->LastOffset) { - KsTsdu->LastOffset -= KsTsduDat->TotalLength; - KsTsduDat->TsduType = 0; - } else { - cfs_enter_debugger(); - KsTsduDat->StartOffset = KsTsduDat->DataLength; - } - } - } - } - - spin_unlock(&tconn->kstc_lock); - } - - /* free the context if is not used at all */ - if (context) { - cfs_free(context); - } - - ks_put_tconn(tconn); - - return rc; -} - -/* - * ks_recv_mdl - * Receive data from the peer for a stream connection - * - * Arguments: - * tconn: tdi connection object - * mdl: the mdl chain to contain the incoming data - * len: length of the data - * flags: flags of the receiving - * - * Return Value: - * ks return code - * - * Notes: - * N/A - */ - -int -ks_recv_mdl( - ksock_tconn_t * tconn, - ksock_mdl_t * mdl, - int size, - int flags - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - int rc = 0; - - BOOLEAN bIsNonBlock; - BOOLEAN bIsExpedited; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_DAT KsTsduDat; - PKS_TSDU_BUF KsTsduBuf; - PKS_TSDU_MDL KsTsduMdl; - - PUCHAR Buffer; - - ULONG BytesRecved = 0; - ULONG RecvedOnce; - - bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT); - bIsExpedited = cfs_is_flag_set(flags, MSG_OOB); - - ks_get_tconn(tconn); - -Again: - - RecvedOnce = 0; - - spin_lock(&(tconn->kstc_lock)); - - if ( tconn->kstc_type != kstt_sender && - tconn->kstc_type != kstt_child) { - - rc = -EINVAL; - spin_unlock(&(tconn->kstc_lock)); - - goto errorout; - } - - if (tconn->kstc_state != ksts_connected) { - - rc = -ENOTCONN; - spin_unlock(&(tconn->kstc_lock)); - - goto errorout; - } - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } - - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - -NextTsdu: - - if (list_empty(&(KsTsduMgr->TsduList))) { - - // - // It's a notification event. We need reset it to - // un-signaled state in case there no any tsdus. - // - - KeResetEvent(&(KsTsduMgr->Event)); - - } else { - - KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - /* remove the KsTsdu from TsduMgr list to release the lock */ - list_del(&(KsTsdu->Link)); - KsTsduMgr->NumOfTsdu--; - - spin_unlock(&(tconn->kstc_lock)); - - while ((ULONG)size > BytesRecved) { - - ULONG BytesCopied = 0; - ULONG BytesToCopy = 0; - ULONG StartOffset = 0; - - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - - if ( TSDU_TYPE_DAT == KsTsduDat->TsduType || - TSDU_TYPE_BUF == KsTsduBuf->TsduType ) { - - - // - // Data Tsdu Unit ... - // - - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - - if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) { - /* data is not ready yet*/ - KeResetEvent(&(KsTsduMgr->Event)); - printk("ks_recv_mdl: KsTsduDat (%xh) is not ready yet !!!!!!!\n", KsTsduDat); - break; - } - - Buffer = &KsTsduDat->Data[0]; - StartOffset = KsTsduDat->StartOffset; - if (KsTsduDat->DataLength - KsTsduDat->StartOffset > size - BytesRecved) { - /* Recvmsg requst could be statisfied ... */ - BytesToCopy = size - BytesRecved; - } else { - BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset; - } - - } else { - - if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) { - /* data is not ready yet*/ - KeResetEvent(&(KsTsduMgr->Event)); - DbgPrint("ks_recv_mdl: KsTsduBuf (%xh) is not ready yet !!!!!!!\n", KsTsduBuf); - break; - } - - ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); - Buffer = KsTsduBuf->UserBuffer; - StartOffset = KsTsduBuf->StartOffset; - - if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > size - BytesRecved) { - /* Recvmsg requst could be statisfied ... */ - BytesToCopy = size - BytesRecved; - } else { - BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset; - } - } - - if (BytesToCopy > 0) { - Status = TdiCopyBufferToMdl( - Buffer, - StartOffset, - BytesToCopy, - mdl, - BytesRecved, - &BytesCopied - ); - - if (NT_SUCCESS(Status)) { - - if (BytesToCopy != BytesCopied) { - cfs_enter_debugger(); - } - - BytesRecved += BytesCopied; - RecvedOnce += BytesCopied; - - } else { - - cfs_enter_debugger(); - - if (STATUS_BUFFER_OVERFLOW == Status) { - } - } - } - - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - - KsTsduDat->StartOffset += BytesCopied; - - if (KsTsduDat->StartOffset == KsTsduDat->DataLength) { - KsTsdu->StartOffset += KsTsduDat->TotalLength; - } - - } else { - - ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); - KsTsduBuf->StartOffset += BytesCopied; - if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) { - KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); - /* now we need release the buf to system pool */ - ExFreePool(KsTsduBuf->UserBuffer); - } - } - - } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { - - // - // MDL Tsdu Unit ... - // - - if (KsTsduMdl->DataLength > size - BytesRecved) { - - /* Recvmsg requst could be statisfied ... */ - - BytesToCopy = size - BytesRecved; - - } else { - - BytesToCopy = KsTsduMdl->DataLength; - } - - Status = KsCopyMdlChainToMdlChain( - KsTsduMdl->Mdl, - KsTsduMdl->StartOffset, - mdl, - BytesRecved, - BytesToCopy, - &BytesCopied - ); - - if (NT_SUCCESS(Status)) { - - if (BytesToCopy != BytesCopied) { - cfs_enter_debugger(); - } - - KsTsduMdl->StartOffset += BytesCopied; - KsTsduMdl->DataLength -= BytesCopied; - - BytesRecved += BytesCopied; - RecvedOnce += BytesCopied; - } else { - cfs_enter_debugger(); - } - - if (0 == KsTsduMdl->DataLength) { - - // - // Call TdiReturnChainedReceives to release the Tsdu memory - // - - TdiReturnChainedReceives( - &(KsTsduMdl->Descriptor), - 1 ); - - KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); - } - - } else { - printk("ks_recv_mdl: unknown tsdu slot: slot = %x type = %x Start= %x\n", - KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength); - printk(" Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x", - KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength); - cfs_enter_debugger(); - } - - if (KsTsdu->StartOffset == KsTsdu->LastOffset) { - - // - // KsTsdu is empty now, we need free it ... - // - - KsPutKsTsdu(KsTsdu); - KsTsdu = NULL; - - break; - } - } - - spin_lock(&(tconn->kstc_lock)); - - /* we need attach the KsTsdu to the list header */ - if (KsTsdu) { - KsTsduMgr->NumOfTsdu++; - list_add(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - } else if ((ULONG)size > BytesRecved) { - goto NextTsdu; - } - } - - if (KsTsduMgr->TotalBytes < RecvedOnce) { - cfs_enter_debugger(); - KsTsduMgr->TotalBytes = 0; - } else { - KsTsduMgr->TotalBytes -= RecvedOnce; - } - - spin_unlock(&(tconn->kstc_lock)); - - if (NT_SUCCESS(Status)) { - - if ((BytesRecved < (ulong_ptr)size) && (!bIsNonBlock)) { - - KeWaitForSingleObject( - &(KsTsduMgr->Event), - Executive, - KernelMode, - FALSE, - NULL - ); - - goto Again; - } - - if (bIsNonBlock && (BytesRecved == 0)) { - rc = -EAGAIN; - } else { - rc = BytesRecved; - } - } - -errorout: - - ks_put_tconn(tconn); - - if (rc > 0) { - KsPrint((1, "ks_recv_mdl: recvieving %d bytes ...\n", rc)); - } else { - KsPrint((0, "ks_recv_mdl: recvieving error code = %d Stauts = %xh ...\n", rc, Status)); - } - - /* release the chained mdl */ - ks_release_mdl(mdl, FALSE); - - return (rc); -} - - -/* - * ks_init_tdi_data - * initialize the global data in ksockal_data - * - * Arguments: - * N/A - * - * Return Value: - * int: ks error code - * - * Notes: - * N/A - */ - -int -ks_init_tdi_data() -{ - int rc = 0; - - /* initialize tconn related globals */ - RtlZeroMemory(&ks_data, sizeof(ks_data_t)); - - spin_lock_init(&ks_data.ksnd_tconn_lock); - CFS_INIT_LIST_HEAD(&ks_data.ksnd_tconns); - cfs_init_event(&ks_data.ksnd_tconn_exit, TRUE, FALSE); - - ks_data.ksnd_tconn_slab = cfs_mem_cache_create( - "tcon", sizeof(ksock_tconn_t) , 0, 0); - - if (!ks_data.ksnd_tconn_slab) { - rc = -ENOMEM; - goto errorout; - } - - /* initialize tsdu related globals */ - - spin_lock_init(&ks_data.ksnd_tsdu_lock); - CFS_INIT_LIST_HEAD(&ks_data.ksnd_freetsdus); - ks_data.ksnd_tsdu_size = TDINAL_TSDU_DEFAULT_SIZE; /* 64k */ - ks_data.ksnd_tsdu_slab = cfs_mem_cache_create( - "tsdu", ks_data.ksnd_tsdu_size, 0, 0); - - if (!ks_data.ksnd_tsdu_slab) { - rc = -ENOMEM; - cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab); - ks_data.ksnd_tconn_slab = NULL; - goto errorout; - } - - /* initialize daemon related globals */ - - spin_lock_init(&ks_data.ksnd_daemon_lock); - CFS_INIT_LIST_HEAD(&ks_data.ksnd_daemons); - cfs_init_event(&ks_data.ksnd_daemon_exit, TRUE, FALSE); - - KsRegisterPnpHandlers(); - -errorout: - - return rc; -} - - -/* - * ks_fini_tdi_data - * finalize the global data in ksockal_data - * - * Arguments: - * N/A - * - * Return Value: - * int: ks error code - * - * Notes: - * N/A - */ - -void -ks_fini_tdi_data() -{ - PKS_TSDU KsTsdu = NULL; - struct list_head * list = NULL; - - /* clean up the pnp handler and address slots */ - KsDeregisterPnpHandlers(); - - /* we need wait until all the tconn are freed */ - spin_lock(&(ks_data.ksnd_tconn_lock)); - - if (list_empty(&(ks_data.ksnd_tconns))) { - cfs_wake_event(&ks_data.ksnd_tconn_exit); - } - spin_unlock(&(ks_data.ksnd_tconn_lock)); - - /* now wait on the tconn exit event */ - cfs_wait_event(&ks_data.ksnd_tconn_exit, 0); - - /* it's safe to delete the tconn slab ... */ - cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab); - ks_data.ksnd_tconn_slab = NULL; - - /* clean up all the tsud buffers in the free list */ - spin_lock(&(ks_data.ksnd_tsdu_lock)); - list_for_each (list, &ks_data.ksnd_freetsdus) { - KsTsdu = list_entry (list, KS_TSDU, Link); - - cfs_mem_cache_free( - ks_data.ksnd_tsdu_slab, - KsTsdu ); - } - spin_unlock(&(ks_data.ksnd_tsdu_lock)); - - /* it's safe to delete the tsdu slab ... */ - cfs_mem_cache_destroy(ks_data.ksnd_tsdu_slab); - ks_data.ksnd_tsdu_slab = NULL; - - /* good! it's smooth to do the cleaning up...*/ -} - -/* - * ks_create_child_tconn - * Create the backlog child connection for a listener - * - * Arguments: - * parent: the listener daemon connection - * - * Return Value: - * the child connection or NULL in failure - * - * Notes: - * N/A - */ - -ksock_tconn_t * -ks_create_child_tconn( - ksock_tconn_t * parent - ) -{ - NTSTATUS status; - ksock_tconn_t * backlog; - - /* allocate the tdi connecton object */ - backlog = ks_create_tconn(); - - if (!backlog) { - goto errorout; - } - - /* initialize the tconn as a child */ - ks_init_child(backlog); - - - /* now bind it */ - if (ks_bind_tconn(backlog, parent, 0, 0) < 0) { - ks_free_tconn(backlog); - backlog = NULL; - goto errorout; - } - - /* open the connection object */ - status = KsOpenConnection( - &(backlog->kstc_dev), - (PVOID)backlog, - &(backlog->child.kstc_info.Handle), - &(backlog->child.kstc_info.FileObject) - ); - - if (!NT_SUCCESS(status)) { - - ks_put_tconn(backlog); - backlog = NULL; - cfs_enter_debugger(); - goto errorout; - } - - /* associate it now ... */ - status = KsAssociateAddress( - backlog->kstc_addr.Handle, - backlog->child.kstc_info.FileObject - ); - - if (!NT_SUCCESS(status)) { - - ks_put_tconn(backlog); - backlog = NULL; - cfs_enter_debugger(); - goto errorout; - } - - backlog->kstc_state = ksts_associated; - -errorout: - - return backlog; -} - -/* - * ks_replenish_backlogs( - * to replenish the backlogs listening... - * - * Arguments: - * tconn: the parent listen tdi connect - * nbacklog: number fo child connections in queue - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_replenish_backlogs( - ksock_tconn_t * parent, - int nbacklog - ) -{ - ksock_tconn_t * backlog; - int n = 0; - - /* calculate how many backlogs needed */ - if ( ( parent->listener.kstc_listening.num + - parent->listener.kstc_accepted.num ) < nbacklog ) { - n = nbacklog - ( parent->listener.kstc_listening.num + - parent->listener.kstc_accepted.num ); - } else { - n = 0; - } - - while (n--) { - - /* create the backlog child tconn */ - backlog = ks_create_child_tconn(parent); - - spin_lock(&(parent->kstc_lock)); - - if (backlog) { - spin_lock(&backlog->kstc_lock); - /* attch it into the listing list of daemon */ - list_add( &backlog->child.kstc_link, - &parent->listener.kstc_listening.list ); - parent->listener.kstc_listening.num++; - - backlog->child.kstc_queued = TRUE; - spin_unlock(&backlog->kstc_lock); - } else { - cfs_enter_debugger(); - } - - spin_unlock(&(parent->kstc_lock)); - } -} - -/* - * ks_start_listen - * setup the listener tdi connection and make it listen - * on the user specified ip address and port. - * - * Arguments: - * tconn: the parent listen tdi connect - * nbacklog: number fo child connections in queue - * - * Return Value: - * ks error code >=: success; otherwise error. - * - * Notes: - * N/A - */ - -int -ks_start_listen(ksock_tconn_t *tconn, int nbacklog) -{ - int rc = 0; - - /* now replenish the backlogs */ - ks_replenish_backlogs(tconn, nbacklog); - - /* set the event callback handlers */ - rc = ks_set_handlers(tconn); - - if (rc < 0) { - return rc; - } - - spin_lock(&(tconn->kstc_lock)); - tconn->listener.nbacklog = nbacklog; - tconn->kstc_state = ksts_listening; - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED); - spin_unlock(&(tconn->kstc_lock)); - - return rc; -} - -void -ks_stop_listen(ksock_tconn_t *tconn) -{ - struct list_head * list; - ksock_tconn_t * backlog; - - /* reset all tdi event callbacks to NULL */ - ks_reset_handlers (tconn); - - spin_lock(&tconn->kstc_lock); - - cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED); - - /* cleanup all the listening backlog child connections */ - list_for_each (list, &(tconn->listener.kstc_listening.list)) { - backlog = list_entry(list, ksock_tconn_t, child.kstc_link); - - /* destory and free it */ - ks_put_tconn(backlog); - } - - spin_unlock(&tconn->kstc_lock); - - /* wake up it from the waiting on new incoming connections */ - KeSetEvent(&tconn->listener.kstc_accept_event, 0, FALSE); - - /* free the listening daemon tconn */ - ks_put_tconn(tconn); -} - - -/* - * ks_wait_child_tconn - * accept a child connection from peer - * - * Arguments: - * parent: the daemon tdi connection listening - * child: to contain the accepted connection - * - * Return Value: - * ks error code; - * - * Notes: - * N/A - */ - -int -ks_wait_child_tconn( - ksock_tconn_t * parent, - ksock_tconn_t ** child - ) -{ - struct list_head * tmp; - ksock_tconn_t * backlog = NULL; - - ks_replenish_backlogs(parent, parent->listener.nbacklog); - - spin_lock(&(parent->kstc_lock)); - - if (parent->listener.kstc_listening.num <= 0) { - spin_unlock(&(parent->kstc_lock)); - return -1; - } - -again: - - /* check the listening queue and try to search the accepted connecton */ - - list_for_each(tmp, &(parent->listener.kstc_listening.list)) { - backlog = list_entry (tmp, ksock_tconn_t, child.kstc_link); - - spin_lock(&(backlog->kstc_lock)); - - if (backlog->child.kstc_accepted) { - - LASSERT(backlog->kstc_state == ksts_connected); - LASSERT(backlog->child.kstc_busy); - - list_del(&(backlog->child.kstc_link)); - list_add(&(backlog->child.kstc_link), - &(parent->listener.kstc_accepted.list)); - parent->listener.kstc_accepted.num++; - parent->listener.kstc_listening.num--; - backlog->child.kstc_queueno = 1; - - spin_unlock(&(backlog->kstc_lock)); - - break; - } else { - spin_unlock(&(backlog->kstc_lock)); - backlog = NULL; - } - } - - spin_unlock(&(parent->kstc_lock)); - - /* we need wait until new incoming connections are requested - or the case of shuting down the listenig daemon thread */ - if (backlog == NULL) { - - NTSTATUS Status; - - Status = KeWaitForSingleObject( - &(parent->listener.kstc_accept_event), - Executive, - KernelMode, - FALSE, - NULL - ); - - spin_lock(&(parent->kstc_lock)); - - /* check whether it's exptected to exit ? */ - if (!cfs_is_flag_set(parent->kstc_flags, KS_TCONN_DAEMON_STARTED)) { - spin_unlock(&(parent->kstc_lock)); - } else { - goto again; - } - } - - if (backlog) { - /* query the local ip address of the connection */ - ks_query_local_ipaddr(backlog); - } - - *child = backlog; - - return 0; -} - -int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) -{ - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - - spin_lock(&ks_data.ksnd_addrs_lock); - - list = ks_data.ksnd_addrs_list.Flink; - while (list != &ks_data.ksnd_addrs_list) { - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - if (_stricmp(name, &slot->iface[0]) == 0) { - *up = slot->up; - *ip = slot->ip_addr; - *mask = slot->netmask; - break; - } - list = list->Flink; - slot = NULL; - } - - spin_unlock(&ks_data.ksnd_addrs_lock); - - return (int)(slot == NULL); -} - -int libcfs_ipif_enumerate(char ***names) -{ - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - int nips = 0; - - spin_lock(&ks_data.ksnd_addrs_lock); - - *names = cfs_alloc(sizeof(char *) * ks_data.ksnd_naddrs, CFS_ALLOC_ZERO); - if (*names == NULL) { - goto errorout; - } - - list = ks_data.ksnd_addrs_list.Flink; - while (list != &ks_data.ksnd_addrs_list) { - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - list = list->Flink; - (*names)[nips++] = slot->iface; - cfs_assert(nips <= ks_data.ksnd_naddrs); - } - - cfs_assert(nips == ks_data.ksnd_naddrs); - -errorout: - - spin_unlock(&ks_data.ksnd_addrs_lock); - return nips; -} - -void libcfs_ipif_free_enumeration(char **names, int n) -{ - if (names) { - cfs_free(names); - } -} - -int libcfs_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog) -{ - int rc = 0; - ksock_tconn_t * parent; - - parent = ks_create_tconn(); - if (!parent) { - rc = -ENOMEM; - goto errorout; - } - - /* initialize the tconn as a listener */ - ks_init_listener(parent); - - /* bind the daemon->tconn */ - rc = ks_bind_tconn(parent, NULL, ip, (unsigned short)port); - - if (rc < 0) { - ks_free_tconn(parent); - goto errorout; - } - - /* create listening children and make it to listen state*/ - rc = ks_start_listen(parent, backlog); - if (rc < 0) { - ks_stop_listen(parent); - goto errorout; - } - - *sockp = parent; - -errorout: - - return rc; -} - -int libcfs_sock_accept(struct socket **newsockp, struct socket *sock) -{ - /* wait for incoming connecitons */ - return ks_wait_child_tconn(sock, newsockp); -} - -void libcfs_sock_abort_accept(struct socket *sock) -{ - LASSERT(sock->kstc_type == kstt_listener); - - spin_lock(&(sock->kstc_lock)); - - /* clear the daemon flag */ - cfs_clear_flag(sock->kstc_flags, KS_TCONN_DAEMON_STARTED); - - /* wake up it from the waiting on new incoming connections */ - KeSetEvent(&sock->listener.kstc_accept_event, 0, FALSE); - - spin_unlock(&(sock->kstc_lock)); -} - -/* - * libcfs_sock_connect - * build a conntion between local ip/port and the peer ip/port. - * - * Arguments: - * laddr: local ip address - * lport: local port number - * paddr: peer's ip address - * pport: peer's port number - * - * Return Value: - * int: return code ... - * - * Notes: - * N/A - */ - - -int libcfs_sock_connect(struct socket **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port) -{ - ksock_tconn_t * tconn = NULL; - int rc = 0; - - *sockp = NULL; - - KsPrint((1, "libcfs_sock_connect: connecting to %x:%d with %x:%d...\n", - peer_ip, peer_port, local_ip, local_port )); - - /* create the tdi connecion structure */ - tconn = ks_create_tconn(); - if (!tconn) { - rc = -ENOMEM; - goto errorout; - } - - /* initialize the tdi sender connection */ - ks_init_sender(tconn); - - /* bind the local ip address with the tconn */ - rc = ks_bind_tconn(tconn, NULL, local_ip, (unsigned short)local_port); - if (rc < 0) { - KsPrint((0, "libcfs_sock_connect: failed to bind address %x:%d...\n", - local_ip, local_port )); - ks_free_tconn(tconn); - goto errorout; - } - - /* connect to the remote peer */ - rc = ks_build_tconn(tconn, peer_ip, (unsigned short)peer_port); - if (rc < 0) { - KsPrint((0, "libcfs_sock_connect: failed to connect %x:%d ...\n", - peer_ip, peer_port )); - - ks_put_tconn(tconn); - goto errorout; - } - - *sockp = tconn; - -errorout: - - return rc; -} - -int libcfs_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize) -{ - return 0; -} - -int libcfs_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize) -{ - return 0; -} - -int libcfs_sock_getaddr(struct socket *socket, int remote, __u32 *ip, int *port) -{ - PTRANSPORT_ADDRESS taddr = NULL; - - spin_lock(&socket->kstc_lock); - if (remote) { - if (socket->kstc_type == kstt_sender) { - taddr = socket->sender.kstc_info.Remote; - } else if (socket->kstc_type == kstt_child) { - taddr = socket->child.kstc_info.Remote; - } - } else { - taddr = &(socket->kstc_addr.Tdi); - } - - if (taddr) { - PTDI_ADDRESS_IP addr = (PTDI_ADDRESS_IP)(&(taddr->Address[0].Address)); - if (ip != NULL) - *ip = ntohl (addr->in_addr); - if (port != NULL) - *port = ntohs (addr->sin_port); - } else { - spin_unlock(&socket->kstc_lock); - return -ENOTCONN; - } - - spin_unlock(&socket->kstc_lock); - return 0; -} - -int libcfs_sock_write(struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - ksock_mdl_t * mdl; - - int offset = 0; - - while (nob > offset) { - - /* lock the user buffer */ - rc = ks_lock_buffer( (char *)buffer + offset, - FALSE, nob - offset, IoReadAccess, &mdl ); - - if (rc < 0) { - return (rc); - } - - /* send out the whole mdl */ - rc = ks_send_mdl( sock, NULL, mdl, nob - offset, 0 ); - - if (rc > 0) { - offset += rc; - } else { - return (rc); - } - } - - return (0); -} - -int libcfs_sock_read(struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - ksock_mdl_t * mdl; - - int offset = 0; - - while (nob > offset) { - - /* lock the user buffer */ - rc = ks_lock_buffer( (char *)buffer + offset, - FALSE, nob - offset, IoWriteAccess, &mdl ); - - if (rc < 0) { - return (rc); - } - - /* recv the requested buffer */ - rc = ks_recv_mdl( sock, mdl, nob - offset, 0 ); - - if (rc > 0) { - offset += rc; - } else { - return (rc); - } - } - - return (0); -} - -void libcfs_sock_release(struct socket *sock) -{ - if (sock->kstc_type == kstt_listener && - sock->kstc_state == ksts_listening) { - ks_stop_listen(sock); - } else { - ks_put_tconn(sock); - } -} diff --git a/lnet/libcfs/winnt/winnt-tracefile.c b/lnet/libcfs/winnt/winnt-tracefile.c deleted file mode 100644 index 61ba735ccf4a9b3fb7badbf383c123ef081994e3..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-tracefile.c +++ /dev/null @@ -1,224 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -#ifndef get_cpu -#define get_cpu() smp_processor_id() -#define put_cpu() do { } while (0) -#endif - -#define TCD_TYPE_MAX 1 - -event_t tracefile_event; - -void tracefile_init_arch() -{ - int i; - int j; - struct trace_cpu_data *tcd; - - cfs_init_event(&tracefile_event, TRUE, TRUE); - - /* initialize trace_data */ - memset(trace_data, 0, sizeof(trace_data)); - for (i = 0; i < TCD_TYPE_MAX; i++) { - trace_data[i]=cfs_alloc(sizeof(struct trace_data_union)*NR_CPUS, 0); - if (trace_data[i] == NULL) - goto out; - } - - /* arch related info initialized */ - tcd_for_each(tcd, i, j) { - tcd->tcd_pages_factor = 100; /* Only one type */ - tcd->tcd_cpu = j; - tcd->tcd_type = i; - } - - memset(trace_console_buffers, 0, sizeof(trace_console_buffers)); - - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; j < 1; j++) { - trace_console_buffers[i][j] = - cfs_alloc(TRACE_CONSOLE_BUFFER_SIZE, - CFS_ALLOC_ZERO); - - if (trace_console_buffers[i][j] == NULL) - goto out; - } - } - - return 0; - -out: - tracefile_fini_arch(); - KsPrint((0, "lnet: No enough memory\n")); - return -ENOMEM; -} - -void tracefile_fini_arch() -{ - int i; - int j; - - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; j < 2; j++) { - if (trace_console_buffers[i][j] != NULL) { - cfs_free(trace_console_buffers[i][j]); - trace_console_buffers[i][j] = NULL; - } - } - } - - for (i = 0; trace_data[i] != NULL; i++) { - cfs_free(trace_data[i]); - trace_data[i] = NULL; - } -} - -void tracefile_read_lock() -{ - cfs_wait_event(&tracefile_event, 0); -} - -void tracefile_read_unlock() -{ - cfs_wake_event(&tracefile_event); -} - -void tracefile_write_lock() -{ - cfs_wait_event(&tracefile_event, 0); -} - -void tracefile_write_unlock() -{ - cfs_wake_event(&tracefile_event); -} - -char * -trace_get_console_buffer(void) -{ -#pragma message ("is there possible problem with pre-emption ?") - int cpu = (int) KeGetCurrentProcessorNumber(); - return trace_console_buffers[cpu][0]; -} - -void -trace_put_console_buffer(char *buffer) -{ -} - -struct trace_cpu_data * -trace_get_tcd(void) -{ -#pragma message("todo: return NULL if in interrupt context") - - int cpu = (int) KeGetCurrentProcessorNumber(); - return &(*trace_data[0])[cpu].tcd; -} - -void -trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags) -{ -} - -int -trace_lock_tcd(struct trace_cpu_data *tcd) -{ - __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); - return 1; -} - -void -trace_unlock_tcd(struct trace_cpu_data *tcd) -{ - __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); -} - -void -set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, - const int line, unsigned long stack) -{ - struct timeval tv; - - do_gettimeofday(&tv); - - header->ph_subsys = subsys; - header->ph_mask = mask; - header->ph_cpu_id = smp_processor_id(); - header->ph_sec = (__u32)tv.tv_sec; - header->ph_usec = tv.tv_usec; - header->ph_stack = stack; - header->ph_pid = current->pid; - header->ph_line_num = line; - header->ph_extern_pid = 0; - return; -} - -void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, - int len, const char *file, const char *fn) -{ - char *prefix = NULL, *ptype = NULL; - - if ((mask & D_EMERG) != 0) { - prefix = "LustreError"; - ptype = KERN_EMERG; - } else if ((mask & D_ERROR) != 0) { - prefix = "LustreError"; - ptype = KERN_ERR; - } else if ((mask & D_WARNING) != 0) { - prefix = "Lustre"; - ptype = KERN_WARNING; - } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) { - prefix = "Lustre"; - ptype = KERN_INFO; - } - - if ((mask & D_CONSOLE) != 0) { - printk("%s%s: %s", ptype, prefix, buf); - } else { - printk("%s%s: %d:%d:(%s:%d:%s()) %s", ptype, prefix, hdr->ph_pid, - hdr->ph_extern_pid, file, hdr->ph_line_num, fn, buf); - } - return; -} - -int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) -{ - return 1; -} - -int trace_max_debug_mb(void) -{ - int total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT)); - - return MAX(512, (total_mb * 80)/100); -} - -void -trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) -{ -#error "tbd" -} - diff --git a/lnet/libcfs/winnt/winnt-usr.c b/lnet/libcfs/winnt/winnt-usr.c deleted file mode 100644 index f79347b8893ba48aa9e9399892f5c2195985b681..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-usr.c +++ /dev/null @@ -1,85 +0,0 @@ - -#ifndef __KERNEL__ - -#include <stdio.h> -#include <stdlib.h> -#include <io.h> -#include <time.h> -#include <windows.h> - -void portals_debug_msg(int subsys, int mask, char *file, const char *fn, - const int line, unsigned long stack, - char *format, ...) { - } - -int cfs_proc_mknod(const char *path, unsigned short mode, unsigned int dev) -{ - return 0; -} - - -void print_last_error(char* Prefix) -{ - LPVOID lpMsgBuf; - - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - GetLastError(), - 0, - (LPTSTR) &lpMsgBuf, - 0, - NULL - ); - - printf("%s %s", Prefix, (LPTSTR) lpMsgBuf); - - LocalFree(lpMsgBuf); -} - -// -// The following declarations are defined in io.h of VC -// sys/types.h will conflict with io.h, so we need place -// these declartions here. - -#ifdef __cplusplus -extern "C" { -#endif - void - __declspec (naked) __cdecl _chkesp(void) - { -#if _X86_ - __asm { jz exit_chkesp }; - __asm { int 3 }; - exit_chkesp: - __asm { ret }; -#endif - } -#ifdef __cplusplus -} -#endif - -unsigned int sleep (unsigned int seconds) -{ - Sleep(seconds * 1000); - return 0; -} - -int gethostname(char * name, int namelen) -{ - return 0; -} - -int ioctl ( - int handle, - int cmd, - void *buffer - ) -{ - printf("hello, world\n"); - return 0; -} - -#endif /* __KERNEL__ */ \ No newline at end of file diff --git a/lnet/libcfs/winnt/winnt-utils.c b/lnet/libcfs/winnt/winnt-utils.c deleted file mode 100644 index cd33aa2a0dc5b28a0f5f33dbda37628fa9d2fdac..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-utils.c +++ /dev/null @@ -1,158 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - - -/* - * miscellaneous libcfs stuff - */ -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/types.h> - -/* - * Convert server error code to client format. Error codes are from - * Linux errno.h, so for Linux client---identity. - */ -int convert_server_error(__u64 ecode) -{ - return cfs_error_code((NTSTATUS)ecode); -} - -/* - * convert <fcntl.h> flag from client to server. - * - * nt kernel uses several members to describe the open flags - * such as DesiredAccess/ShareAccess/CreateDisposition/CreateOptions - * so it's better to convert when using, not here. - */ - -int convert_client_oflag(int cflag, int *result) -{ - *result = 0; - return 0; -} - - -int cfs_error_code(NTSTATUS Status) -{ - switch (Status) { - - case STATUS_ACCESS_DENIED: - return (-EACCES); - - case STATUS_ACCESS_VIOLATION: - return (-EFAULT); - - case STATUS_BUFFER_TOO_SMALL: - return (-ETOOSMALL); - - case STATUS_INVALID_PARAMETER: - return (-EINVAL); - - case STATUS_NOT_IMPLEMENTED: - case STATUS_NOT_SUPPORTED: - return (-EOPNOTSUPP); - - case STATUS_INVALID_ADDRESS: - case STATUS_INVALID_ADDRESS_COMPONENT: - return (-EADDRNOTAVAIL); - - case STATUS_NO_SUCH_DEVICE: - case STATUS_NO_SUCH_FILE: - case STATUS_OBJECT_NAME_NOT_FOUND: - case STATUS_OBJECT_PATH_NOT_FOUND: - case STATUS_NETWORK_BUSY: - case STATUS_INVALID_NETWORK_RESPONSE: - case STATUS_UNEXPECTED_NETWORK_ERROR: - return (-ENETDOWN); - - case STATUS_BAD_NETWORK_PATH: - case STATUS_NETWORK_UNREACHABLE: - case STATUS_PROTOCOL_UNREACHABLE: - return (-ENETUNREACH); - - case STATUS_LOCAL_DISCONNECT: - case STATUS_TRANSACTION_ABORTED: - case STATUS_CONNECTION_ABORTED: - return (-ECONNABORTED); - - case STATUS_REMOTE_DISCONNECT: - case STATUS_LINK_FAILED: - case STATUS_CONNECTION_DISCONNECTED: - case STATUS_CONNECTION_RESET: - case STATUS_PORT_UNREACHABLE: - return (-ECONNRESET); - - case STATUS_PAGEFILE_QUOTA: - case STATUS_NO_MEMORY: - case STATUS_CONFLICTING_ADDRESSES: - case STATUS_QUOTA_EXCEEDED: - case STATUS_TOO_MANY_PAGING_FILES: - case STATUS_INSUFFICIENT_RESOURCES: - case STATUS_WORKING_SET_QUOTA: - case STATUS_COMMITMENT_LIMIT: - case STATUS_TOO_MANY_ADDRESSES: - case STATUS_REMOTE_RESOURCES: - return (-ENOBUFS); - - case STATUS_INVALID_CONNECTION: - return (-ENOTCONN); - - case STATUS_PIPE_DISCONNECTED: - return (-ESHUTDOWN); - - case STATUS_TIMEOUT: - case STATUS_IO_TIMEOUT: - case STATUS_LINK_TIMEOUT: - return (-ETIMEDOUT); - - case STATUS_REMOTE_NOT_LISTENING: - case STATUS_CONNECTION_REFUSED: - return (-ECONNREFUSED); - - case STATUS_HOST_UNREACHABLE: - return (-EHOSTUNREACH); - - case STATUS_PENDING: - case STATUS_DEVICE_NOT_READY: - return (-EAGAIN); - - case STATUS_CANCELLED: - case STATUS_REQUEST_ABORTED: - return (-EINTR); - - case STATUS_BUFFER_OVERFLOW: - case STATUS_INVALID_BUFFER_SIZE: - return (-EMSGSIZE); - - } - - if (NT_SUCCESS(Status)) - return 0; - - return (-EINVAL); -} - - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{ -} - -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - return NULL; -} diff --git a/lnet/lnet/.cvsignore b/lnet/lnet/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/lnet/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/lnet/Info.plist b/lnet/lnet/Info.plist deleted file mode 100644 index 2b3967f0ff7429418bdbccc1ab3d3afd302e22a9..0000000000000000000000000000000000000000 --- a/lnet/lnet/Info.plist +++ /dev/null @@ -1,37 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> -<plist version="1.0"> -<dict> - <key>CFBundleDevelopmentRegion</key> - <string>English</string> - <key>CFBundleExecutable</key> - <string>lnet</string> - <key>CFBundleIconFile</key> - <string></string> - <key>CFBundleIdentifier</key> - <string>com.clusterfs.lustre.lnet</string> - <key>CFBundleInfoDictionaryVersion</key> - <string>6.0</string> - <key>CFBundlePackageType</key> - <string>KEXT</string> - <key>CFBundleSignature</key> - <string>????</string> - <key>CFBundleVersion</key> - <string>1.0.1</string> - <key>OSBundleCompatibleVersion</key> - <string>1.0.0</string> - <key>OSBundleLibraries</key> - <dict> - <key>com.apple.kpi.bsd</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.libkern</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.mach</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.unsupported</key> - <string>8.0.0b1</string> - <key>com.clusterfs.lustre.libcfs</key> - <string>1.0.0</string> - </dict> -</dict> -</plist> diff --git a/lnet/lnet/Makefile.in b/lnet/lnet/Makefile.in deleted file mode 100644 index 3bc86f6577534f02fe4dfc502eb592411f57d41a..0000000000000000000000000000000000000000 --- a/lnet/lnet/Makefile.in +++ /dev/null @@ -1,10 +0,0 @@ -MODULES := lnet - -lnet-objs := api-errno.o api-ni.o config.o -lnet-objs += lib-me.o lib-msg.o lib-eq.o lib-md.o -lnet-objs += lib-move.o module.o lo.o -lnet-objs += router.o router_proc.o acceptor.o peer.o - -default: all - -@INCLUDE_RULES@ diff --git a/lnet/lnet/acceptor.c b/lnet/lnet/acceptor.c deleted file mode 100644 index be1abd22b43323356093f7b9253fae8871fc2f7b..0000000000000000000000000000000000000000 --- a/lnet/lnet/acceptor.c +++ /dev/null @@ -1,857 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -#ifdef __KERNEL__ -static char *accept = "secure"; -CFS_MODULE_PARM(accept, "s", charp, 0444, - "Accept connections (secure|all|none)"); - -static int accept_port = 988; -CFS_MODULE_PARM(accept_port, "i", int, 0444, - "Acceptor's port (same on all nodes)"); - -static int accept_backlog = 127; -CFS_MODULE_PARM(accept_backlog, "i", int, 0444, - "Acceptor's listen backlog"); - -static int accept_timeout = 5; -CFS_MODULE_PARM(accept_timeout, "i", int, 0644, - "Acceptor's timeout (seconds)"); - -struct { - int pta_shutdown; - cfs_socket_t *pta_sock; - struct semaphore pta_signal; -} lnet_acceptor_state; - -int -lnet_acceptor_timeout(void) -{ - return accept_timeout; -} -EXPORT_SYMBOL(lnet_acceptor_timeout); - -int -lnet_acceptor_port(void) -{ - return accept_port; -} -EXPORT_SYMBOL(lnet_acceptor_port); - -void -lnet_connect_console_error (int rc, lnet_nid_t peer_nid, - __u32 peer_ip, int peer_port) -{ - switch (rc) { - /* "normal" errors */ - case -ECONNREFUSED: - CDEBUG(D_NETERROR, "Connection to %s at host %u.%u.%u.%u " - "on port %d was refused: " - "check that Lustre is running on that node.\n", - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - case -EHOSTUNREACH: - case -ENETUNREACH: - CDEBUG(D_NETERROR, "Connection to %s at host %u.%u.%u.%u " - "was unreachable: the network or that node may " - "be down, or Lustre may be misconfigured.\n", - libcfs_nid2str(peer_nid), HIPQUAD(peer_ip)); - break; - case -ETIMEDOUT: - CDEBUG(D_NETERROR, "Connection to %s at host %u.%u.%u.%u on " - "port %d took too long: that node may be hung " - "or experiencing high load.\n", - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - case -ECONNRESET: - LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %u.%u.%u.%u" - " on port %d was reset: " - "is it running a compatible version of " - "Lustre and is %s one of its NIDs?\n", - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port, - libcfs_nid2str(peer_nid)); - break; - case -EPROTO: - LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at " - "host %u.%u.%u.%u on port %d: is it running " - "a compatible version of Lustre?\n", - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - case -EADDRINUSE: - LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to " - "connect to %s at host %u.%u.%u.%u on port " - "%d\n", libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - default: - LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s" - " at host %u.%u.%u.%u on port %d\n", rc, - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - } -} -EXPORT_SYMBOL(lnet_connect_console_error); - -int -lnet_connect(cfs_socket_t **sockp, lnet_nid_t peer_nid, - __u32 local_ip, __u32 peer_ip, int peer_port) -{ - lnet_acceptor_connreq_t cr; - cfs_socket_t *sock; - int rc; - int port; - int fatal; - - CLASSERT (sizeof(cr) <= 16); /* not too big to be on the stack */ - - for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT; - port >= LNET_ACCEPTOR_MIN_RESERVED_PORT; - --port) { - /* Iterate through reserved ports. */ - - rc = libcfs_sock_connect(&sock, &fatal, - local_ip, port, - peer_ip, peer_port); - if (rc != 0) { - if (fatal) - goto failed; - continue; - } - - CLASSERT (LNET_PROTO_ACCEPTOR_VERSION == 1); - - if (the_lnet.ln_ptlcompat != 2) { - /* When portals compatibility is "strong", simply - * connect (i.e. send no acceptor connection request). - * Othewise send an acceptor connection request. I can - * have no portals peers so everyone else should - * understand my protocol. */ - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - cr.acr_nid = peer_nid; - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 4) != 0) { - cr.acr_version++; - the_lnet.ln_testprotocompat &= ~4; - } - if ((the_lnet.ln_testprotocompat & 8) != 0) { - cr.acr_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~8; - } - LNET_UNLOCK(); - } - - rc = libcfs_sock_write(sock, &cr, sizeof(cr), - accept_timeout); - if (rc != 0) - goto failed_sock; - } - - *sockp = sock; - return 0; - } - - rc = -EADDRINUSE; - goto failed; - - failed_sock: - libcfs_sock_release(sock); - failed: - lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port); - return rc; -} -EXPORT_SYMBOL(lnet_connect); - -static inline int -lnet_accept_magic(__u32 magic, __u32 constant) -{ - return (magic == constant || - magic == __swab32(constant)); -} - -int -lnet_accept(lnet_ni_t *blind_ni, cfs_socket_t *sock, __u32 magic) -{ - lnet_acceptor_connreq_t cr; - __u32 peer_ip; - int peer_port; - int rc; - int flip; - lnet_ni_t *ni; - char *str; - - /* CAVEAT EMPTOR: I may be called by an LND in any thread's context if - * I passed the new socket "blindly" to the single NI that needed an - * acceptor. If so, blind_ni != NULL... */ - - LASSERT (sizeof(cr) <= 16); /* not too big for the stack */ - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT (rc == 0); /* we succeeded before */ - - if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) { - - if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) { - /* future version compatibility! - * When LNET unifies protocols over all LNDs, the first - * thing sent will be a version query. I send back - * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" */ - - memset (&cr, 0, sizeof(cr)); - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - rc = libcfs_sock_write(sock, &cr, sizeof(cr), - accept_timeout); - - if (rc != 0) - CERROR("Error sending magic+version in response" - "to LNET magic from %u.%u.%u.%u: %d\n", - HIPQUAD(peer_ip), rc); - return -EPROTO; - } - - if (magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) - str = "'old' socknal/tcpnal"; - else if (lnet_accept_magic(magic, LNET_PROTO_RA_MAGIC)) - str = "'old' ranal"; - else if (lnet_accept_magic(magic, LNET_PROTO_OPENIB_MAGIC)) - str = "'old' openibnal"; - else - str = "unrecognised"; - - LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %u.%u.%u.%u" - " magic %08x: %s acceptor protocol\n", - HIPQUAD(peer_ip), magic, str); - return -EPROTO; - } - - flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC); - - rc = libcfs_sock_read(sock, &cr.acr_version, - sizeof(cr.acr_version), - accept_timeout); - if (rc != 0) { - CERROR("Error %d reading connection request version from " - "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip)); - return -EIO; - } - - if (flip) - __swab32s(&cr.acr_version); - - if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) { - /* future version compatibility! - * An acceptor-specific protocol rev will first send a version - * query. I send back my current version to tell her I'm - * "old". */ - int peer_version = cr.acr_version; - - memset (&cr, 0, sizeof(cr)); - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - - rc = libcfs_sock_write(sock, &cr, sizeof(cr), - accept_timeout); - - if (rc != 0) - CERROR("Error sending magic+version in response" - "to version %d from %u.%u.%u.%u: %d\n", - peer_version, HIPQUAD(peer_ip), rc); - return -EPROTO; - } - - rc = libcfs_sock_read(sock, &cr.acr_nid, - sizeof(cr) - - offsetof(lnet_acceptor_connreq_t, acr_nid), - accept_timeout); - if (rc != 0) { - CERROR("Error %d reading connection request from " - "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip)); - return -EIO; - } - - if (flip) - __swab64s(&cr.acr_nid); - - ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid)); - if (ni == NULL || /* no matching net */ - ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */ - if (ni != NULL) - lnet_ni_decref(ni); - LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %u.%u.%u.%u" - " for %s: No matching NI\n", - HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - if (ni->ni_lnd->lnd_accept == NULL) { - /* This catches a request for the loopback LND */ - lnet_ni_decref(ni); - LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %u.%u.%u.%u" - " for %s: NI doesn not accept IP connections\n", - HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - CDEBUG(D_NET, "Accept %s from %u.%u.%u.%u%s\n", - libcfs_nid2str(cr.acr_nid), HIPQUAD(peer_ip), - blind_ni == NULL ? "" : " (blind)"); - - if (blind_ni == NULL) { - /* called by the acceptor: call into the requested NI... */ - rc = ni->ni_lnd->lnd_accept(ni, sock); - } else { - /* portals_compatible set and the (only) NI called me to verify - * and skip the connection request... */ - LASSERT (the_lnet.ln_ptlcompat != 0); - LASSERT (ni == blind_ni); - rc = 0; - } - - lnet_ni_decref(ni); - return rc; -} -EXPORT_SYMBOL(lnet_accept); - -int -lnet_acceptor(void *arg) -{ - char name[16]; - cfs_socket_t *newsock; - int rc; - int n_acceptor_nis; - __u32 magic; - __u32 peer_ip; - int peer_port; - lnet_ni_t *blind_ni = NULL; - int secure = (int)((unsigned long)arg); - - LASSERT (lnet_acceptor_state.pta_sock == NULL); - - if (the_lnet.ln_ptlcompat != 0) { - /* When portals_compatibility is enabled, peers may connect - * without sending an acceptor connection request. There is no - * ambiguity about which network the peer wants to connect to - * since there can only be 1 network, so I pass connections - * "blindly" to it. */ - n_acceptor_nis = lnet_count_acceptor_nis(&blind_ni); - LASSERT (n_acceptor_nis == 1); - LASSERT (blind_ni != NULL); - } - - snprintf(name, sizeof(name), "acceptor_%03d", accept_port); - cfs_daemonize(name); - cfs_block_allsigs(); - - rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock, - 0, accept_port, accept_backlog); - if (rc != 0) { - if (rc == -EADDRINUSE) - LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port" - " %d: port already in use\n", - accept_port); - else - LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port " - "%d: unexpected error %d\n", - accept_port, rc); - - lnet_acceptor_state.pta_sock = NULL; - } else { - LCONSOLE(0, "Accept %s, port %d%s\n", - accept, accept_port, - blind_ni == NULL ? "" : " (proto compatible)"); - } - - /* set init status and unblock parent */ - lnet_acceptor_state.pta_shutdown = rc; - mutex_up(&lnet_acceptor_state.pta_signal); - - if (rc != 0) - return rc; - - while (!lnet_acceptor_state.pta_shutdown) { - - rc = libcfs_sock_accept(&newsock, lnet_acceptor_state.pta_sock); - if (rc != 0) { - if (rc != -EAGAIN) { - CWARN("Accept error %d: pausing...\n", rc); - cfs_pause(cfs_time_seconds(1)); - } - continue; - } - - rc = libcfs_sock_getaddr(newsock, 1, &peer_ip, &peer_port); - if (rc != 0) { - CERROR("Can't determine new connection's address\n"); - goto failed; - } - - if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - CERROR("Refusing connection from %u.%u.%u.%u: " - "insecure port %d\n", - HIPQUAD(peer_ip), peer_port); - goto failed; - } - - if (blind_ni != NULL) { - rc = blind_ni->ni_lnd->lnd_accept(blind_ni, newsock); - if (rc != 0) { - CERROR("NI %s refused 'blind' connection from " - "%u.%u.%u.%u\n", - libcfs_nid2str(blind_ni->ni_nid), - HIPQUAD(peer_ip)); - goto failed; - } - continue; - } - - rc = libcfs_sock_read(newsock, &magic, sizeof(magic), - accept_timeout); - if (rc != 0) { - CERROR("Error %d reading connection request from " - "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip)); - goto failed; - } - - rc = lnet_accept(NULL, newsock, magic); - if (rc != 0) - goto failed; - - continue; - - failed: - libcfs_sock_release(newsock); - } - - libcfs_sock_release(lnet_acceptor_state.pta_sock); - lnet_acceptor_state.pta_sock = NULL; - - if (blind_ni != NULL) - lnet_ni_decref(blind_ni); - - LCONSOLE(0,"Acceptor stopping\n"); - - /* unblock lnet_acceptor_stop() */ - mutex_up(&lnet_acceptor_state.pta_signal); - return 0; -} - -int -lnet_acceptor_start(void) -{ - long pid; - long secure; - - LASSERT (lnet_acceptor_state.pta_sock == NULL); - init_mutex_locked(&lnet_acceptor_state.pta_signal); - - if (!strcmp(accept, "secure")) { - secure = 1; - } else if (!strcmp(accept, "all")) { - secure = 0; - } else if (!strcmp(accept, "none")) { - return 0; - } else { - LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n", - accept); - return -EINVAL; - } - - if (lnet_count_acceptor_nis(NULL) == 0) /* not required */ - return 0; - - pid = cfs_kernel_thread(lnet_acceptor, (void *)secure, 0); - if (pid < 0) { - CERROR("Can't start acceptor thread: %ld\n", pid); - return -ESRCH; - } - - mutex_down(&lnet_acceptor_state.pta_signal); /* wait for acceptor to startup */ - - if (!lnet_acceptor_state.pta_shutdown) { - /* started OK */ - LASSERT (lnet_acceptor_state.pta_sock != NULL); - return 0; - } - - LASSERT (lnet_acceptor_state.pta_sock == NULL); - return -ENETDOWN; -} - -void -lnet_acceptor_stop(void) -{ - if (lnet_acceptor_state.pta_sock == NULL) /* not running */ - return; - - lnet_acceptor_state.pta_shutdown = 1; - libcfs_sock_abort_accept(lnet_acceptor_state.pta_sock); - - /* block until acceptor signals exit */ - mutex_down(&lnet_acceptor_state.pta_signal); -} - -#else /* __KERNEL__ */ -#ifdef HAVE_LIBPTHREAD - -static char *accept_type; -static int accept_port = 988; -static int accept_backlog; -static int accept_timeout; - -struct { - int pta_shutdown; - int pta_sock; - struct cfs_completion pta_completion; -} lnet_acceptor_state; - -int -lnet_acceptor_port(void) -{ - return accept_port; -} - -int -lnet_parse_int_tunable(int *value, char *name, int dflt) -{ - char *env = getenv(name); - char *end; - - if (env == NULL) { - *value = dflt; - return 0; - } - - *value = strtoull(env, &end, 0); - if (*end == 0) - return 0; - - CERROR("Can't parse tunable %s=%s\n", name, env); - return -EINVAL; -} - -int -lnet_parse_string_tunable(char **value, char *name, char *dflt) -{ - char *env = getenv(name); - - if (env == NULL) - *value = dflt; - else - *value = env; - - return 0; -} - -int -lnet_acceptor_get_tunables() -{ - int rc; - rc = lnet_parse_string_tunable(&accept_type, "LNET_ACCEPT", "secure"); - - if (rc != 0) - return rc; - - rc = lnet_parse_int_tunable(&accept_port, "LNET_ACCEPT_PORT", 988); - - if (rc != 0) - return rc; - - rc = lnet_parse_int_tunable(&accept_backlog, "LNET_ACCEPT_BACKLOG", 127); - - if (rc != 0) - return rc; - - rc = lnet_parse_int_tunable(&accept_timeout, "LNET_ACCEPT_TIMEOUT", 5); - - if (rc != 0) - return rc; - - CDEBUG(D_NET, "accept_type = %s\n", accept_type); - CDEBUG(D_NET, "accept_port = %d\n", accept_port); - CDEBUG(D_NET, "accept_backlog = %d\n", accept_backlog); - CDEBUG(D_NET, "accept_timeout = %d\n", accept_timeout); - return 0; -} - -static inline int -lnet_accept_magic(__u32 magic, __u32 constant) -{ - return (magic == constant || - magic == __swab32(constant)); -} - -/* user-land lnet_accept() isn't used by any LND's directly. So, we don't - * do it visible outside acceptor.c and we can change its prototype - * freely */ -static int -lnet_accept(int sock, __u32 magic, __u32 peer_ip, int peer_port) -{ - int rc, flip; - lnet_acceptor_connreq_t cr; - lnet_ni_t *ni; - - if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) { - LCONSOLE_ERROR("Refusing connection from %u.%u.%u.%u magic %08x: " - "unsupported acceptor protocol\n", - HIPQUAD(peer_ip), magic); - return -EPROTO; - } - - flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC); - - rc = libcfs_sock_read(sock, &cr.acr_version, - sizeof(cr.acr_version), - accept_timeout); - if (rc != 0) { - CERROR("Error %d reading connection request version from " - "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip)); - return -EIO; - } - - if (flip) - __swab32s(&cr.acr_version); - - if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) - return -EPROTO; - - rc = libcfs_sock_read(sock, &cr.acr_nid, - sizeof(cr) - - offsetof(lnet_acceptor_connreq_t, acr_nid), - accept_timeout); - if (rc != 0) { - CERROR("Error %d reading connection request from " - "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip)); - return -EIO; - } - - if (flip) - __swab64s(&cr.acr_nid); - - ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid)); - - if (ni == NULL || /* no matching net */ - ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */ - if (ni != NULL) - lnet_ni_decref(ni); - LCONSOLE_ERROR("Refusing connection from %u.%u.%u.%u for %s: " - " No matching NI\n", - HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - if (ni->ni_lnd->lnd_accept == NULL) { - lnet_ni_decref(ni); - LCONSOLE_ERROR("Refusing connection from %u.%u.%u.%u for %s: " - " NI doesn not accept IP connections\n", - HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - CDEBUG(D_NET, "Accept %s from %u.%u.%u.%u\n", - libcfs_nid2str(cr.acr_nid), HIPQUAD(peer_ip)); - - rc = ni->ni_lnd->lnd_accept(ni, sock); - - lnet_ni_decref(ni); - return rc; -} - -int -lnet_acceptor(void *arg) -{ - char name[16]; - int secure = (int)((unsigned long)arg); - int rc; - int newsock; - __u32 peer_ip; - int peer_port; - __u32 magic; - - snprintf(name, sizeof(name), "acceptor_%03d", accept_port); - cfs_daemonize(name); - cfs_block_allsigs(); - - rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock, - 0, accept_port, accept_backlog); - if (rc != 0) { - if (rc == -EADDRINUSE) - LCONSOLE_ERROR("Can't start acceptor on port %d: " - "port already in use\n", - accept_port); - else - LCONSOLE_ERROR("Can't start acceptor on port %d: " - "unexpected error %d\n", - accept_port, rc); - - } else { - LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port); - } - - /* set init status and unblock parent */ - lnet_acceptor_state.pta_shutdown = rc; - cfs_complete(&lnet_acceptor_state.pta_completion); - - if (rc != 0) - return rc; - - while (!lnet_acceptor_state.pta_shutdown) { - - rc = libcfs_sock_accept(&newsock, lnet_acceptor_state.pta_sock, - &peer_ip, &peer_port); - if (rc != 0) - continue; - - /* maybe we're waken up with libcfs_sock_abort_accept() */ - if (lnet_acceptor_state.pta_shutdown) { - close(newsock); - break; - } - - if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - CERROR("Refusing connection from %u.%u.%u.%u: " - "insecure port %d\n", - HIPQUAD(peer_ip), peer_port); - goto failed; - } - - rc = libcfs_sock_read(newsock, &magic, sizeof(magic), - accept_timeout); - if (rc != 0) { - CERROR("Error %d reading connection request from " - "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip)); - goto failed; - } - - rc = lnet_accept(newsock, magic, peer_ip, peer_port); - if (rc != 0) - goto failed; - - continue; - - failed: - close(newsock); - } - - close(lnet_acceptor_state.pta_sock); - LCONSOLE(0,"Acceptor stopping\n"); - - /* unblock lnet_acceptor_stop() */ - cfs_complete(&lnet_acceptor_state.pta_completion); - - return 0; -} - -static int skip_waiting_for_completion; - -int -lnet_acceptor_start(void) -{ - long secure; - int rc; - - rc = lnet_acceptor_get_tunables(); - if (rc != 0) - return rc; - - /* Do nothing if we're liblustre clients */ - if ((the_lnet.ln_pid & LNET_PID_USERFLAG) != 0) - return 0; - - cfs_init_completion(&lnet_acceptor_state.pta_completion); - - if (!strcmp(accept_type, "secure")) { - secure = 1; - } else if (!strcmp(accept_type, "all")) { - secure = 0; - } else if (!strcmp(accept_type, "none")) { - skip_waiting_for_completion = 1; - return 0; - } else { - LCONSOLE_ERROR ("Can't parse 'accept_type=\"%s\"'\n", accept_type); - cfs_fini_completion(&lnet_acceptor_state.pta_completion); - return -EINVAL; - } - - if (lnet_count_acceptor_nis(NULL) == 0) { /* not required */ - skip_waiting_for_completion = 1; - return 0; - } - - rc = cfs_create_thread(lnet_acceptor, (void *)secure); - if (rc != 0) { - CERROR("Can't start acceptor thread: %d\n", rc); - cfs_fini_completion(&lnet_acceptor_state.pta_completion); - return rc; - } - - /* wait for acceptor to startup */ - cfs_wait_for_completion(&lnet_acceptor_state.pta_completion); - - if (!lnet_acceptor_state.pta_shutdown) - return 0; - - cfs_fini_completion(&lnet_acceptor_state.pta_completion); - return -ENETDOWN; -} - -void -lnet_acceptor_stop(void) -{ - /* Do nothing if we're liblustre clients */ - if ((the_lnet.ln_pid & LNET_PID_USERFLAG) != 0) - return; - - if (!skip_waiting_for_completion) { - lnet_acceptor_state.pta_shutdown = 1; - libcfs_sock_abort_accept(accept_port); - - /* block until acceptor signals exit */ - cfs_wait_for_completion(&lnet_acceptor_state.pta_completion); - } - - cfs_fini_completion(&lnet_acceptor_state.pta_completion); -} -#else -int -lnet_acceptor_start(void) -{ - return 0; -} - -void -lnet_acceptor_stop(void) -{ -} -#endif /* !HAVE_LIBPTHREAD */ -#endif /* !__KERNEL__ */ diff --git a/lnet/lnet/api-errno.c b/lnet/lnet/api-errno.c deleted file mode 100644 index a158d6ea7e4186e8007de5b333b27cb22997b762..0000000000000000000000000000000000000000 --- a/lnet/lnet/api-errno.c +++ /dev/null @@ -1,11 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-errno.c - * Instantiate the string table of errors - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * This file is not subject to copyright protection. - */ - -/* If you change these, you must update the number table in portals/errno.h */ diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c deleted file mode 100644 index 8a86dcc72fe7bdff691a1f589a56fc778131248e..0000000000000000000000000000000000000000 --- a/lnet/lnet/api-ni.c +++ /dev/null @@ -1,1759 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -#ifdef __KERNEL__ -#define D_LNI D_CONSOLE -#else -#define D_LNI D_CONFIG -#endif - -lnet_t the_lnet; /* THE state of the network */ - -#ifdef __KERNEL__ - -static char *ip2nets = ""; -CFS_MODULE_PARM(ip2nets, "s", charp, 0444, - "LNET network <- IP table"); - -static char *networks = ""; -CFS_MODULE_PARM(networks, "s", charp, 0444, - "local networks"); - -static char *routes = ""; -CFS_MODULE_PARM(routes, "s", charp, 0444, - "routes to non-local networks"); - -static char *portals_compatibility = "none"; -CFS_MODULE_PARM(portals_compatibility, "s", charp, 0444, - "wire protocol compatibility: 'strong'|'weak'|'none'"); - -char * -lnet_get_routes(void) -{ - return routes; -} - -char * -lnet_get_networks(void) -{ - char *nets; - int rc; - - if (*networks != 0 && *ip2nets != 0) { - LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or " - "'ip2nets' but not both at once\n"); - return NULL; - } - - if (*ip2nets != 0) { - rc = lnet_parse_ip2nets(&nets, ip2nets); - return (rc == 0) ? nets : NULL; - } - - if (*networks != 0) - return networks; - - return "tcp"; -} - -int -lnet_get_portals_compatibility(void) -{ - if (!strcmp(portals_compatibility, "none")) { - return 0; - } - - if (!strcmp(portals_compatibility, "weak")) { - return 1; - LCONSOLE_WARN("Starting in weak portals-compatible mode\n"); - } - - if (!strcmp(portals_compatibility, "strong")) { - return 2; - LCONSOLE_WARN("Starting in strong portals-compatible mode\n"); - } - - LCONSOLE_ERROR_MSG(0x102, "portals_compatibility=\"%s\" not supported\n", - portals_compatibility); - return -EINVAL; -} - -void -lnet_init_locks(void) -{ - spin_lock_init (&the_lnet.ln_lock); - cfs_waitq_init (&the_lnet.ln_waitq); - init_mutex(&the_lnet.ln_lnd_mutex); - init_mutex(&the_lnet.ln_api_mutex); -} - -void -lnet_fini_locks(void) -{ -} - -#else - -char * -lnet_get_routes(void) -{ - char *str = getenv("LNET_ROUTES"); - - return (str == NULL) ? "" : str; -} - -char * -lnet_get_networks (void) -{ - static char default_networks[256]; - char *networks = getenv ("LNET_NETWORKS"); - char *ip2nets = getenv ("LNET_IP2NETS"); - char *str; - char *sep; - int len; - int nob; - int rc; - struct list_head *tmp; - -#ifdef NOT_YET - if (networks != NULL && ip2nets != NULL) { - LCONSOLE_ERROR_MSG(0x103, "Please set EITHER 'LNET_NETWORKS' or" - " 'LNET_IP2NETS' but not both at once\n"); - return NULL; - } - - if (ip2nets != NULL) { - rc = lnet_parse_ip2nets(&networks, ip2nets); - return (rc == 0) ? networks : NULL; - } -#else - ip2nets = NULL; - rc = 0; -#endif - if (networks != NULL) - return networks; - - /* In userland, the default 'networks=' is the list of known net types */ - - len = sizeof(default_networks); - str = default_networks; - *str = 0; - sep = ""; - - list_for_each (tmp, &the_lnet.ln_lnds) { - lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list); - - nob = snprintf(str, len, "%s%s", sep, - libcfs_lnd2str(lnd->lnd_type)); - len -= nob; - if (len < 0) { - /* overflowed the string; leave it where it was */ - *str = 0; - break; - } - - str += nob; - sep = ","; - } - - return default_networks; -} - -int -lnet_get_portals_compatibility(void) -{ - return 0; -} - -# ifndef HAVE_LIBPTHREAD - -void lnet_init_locks(void) -{ - the_lnet.ln_lock = 0; - the_lnet.ln_lnd_mutex = 0; - the_lnet.ln_api_mutex = 0; -} - -void lnet_fini_locks(void) -{ - LASSERT (the_lnet.ln_api_mutex == 0); - LASSERT (the_lnet.ln_lnd_mutex == 0); - LASSERT (the_lnet.ln_lock == 0); -} - -# else - -void lnet_init_locks(void) -{ - pthread_cond_init(&the_lnet.ln_cond, NULL); - pthread_mutex_init(&the_lnet.ln_lock, NULL); - pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL); - pthread_mutex_init(&the_lnet.ln_api_mutex, NULL); -} - -void lnet_fini_locks(void) -{ - pthread_mutex_destroy(&the_lnet.ln_api_mutex); - pthread_mutex_destroy(&the_lnet.ln_lnd_mutex); - pthread_mutex_destroy(&the_lnet.ln_lock); - pthread_cond_destroy(&the_lnet.ln_cond); -} - -# endif -#endif - -void lnet_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux robert.bartonsoftware.com 2.6.8-1.521 - * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux - * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */ - - /* Constants... */ - CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded); - CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1); - CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0); - CLASSERT (LNET_MSG_ACK == 0); - CLASSERT (LNET_MSG_PUT == 1); - CLASSERT (LNET_MSG_GET == 2); - CLASSERT (LNET_MSG_REPLY == 3); - CLASSERT (LNET_MSG_HELLO == 4); - - /* Checks for struct ptl_handle_wire_t */ - CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16); - CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0); - CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8); - CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8); - CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8); - - /* Checks for struct lnet_magicversion_t */ - CLASSERT ((int)sizeof(lnet_magicversion_t) == 8); - CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0); - CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4); - CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4); - CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2); - CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6); - CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2); - - /* Checks for struct lnet_hdr_t */ - CLASSERT ((int)sizeof(lnet_hdr_t) == 72); - CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40); - - /* Ack */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4); - - /* Put */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4); - - /* Get */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4); - - /* Reply */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16); - - /* Hello */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4); -} - -lnd_t * -lnet_find_lnd_by_type (int type) -{ - lnd_t *lnd; - struct list_head *tmp; - - /* holding lnd mutex */ - list_for_each (tmp, &the_lnet.ln_lnds) { - lnd = list_entry(tmp, lnd_t, lnd_list); - - if (lnd->lnd_type == type) - return lnd; - } - - return NULL; -} - -void -lnet_register_lnd (lnd_t *lnd) -{ - LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex); - - LASSERT (the_lnet.ln_init); - LASSERT (libcfs_isknown_lnd(lnd->lnd_type)); - LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL); - - list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds); - lnd->lnd_refcount = 0; - - CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type)); - - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); -} - -void -lnet_unregister_lnd (lnd_t *lnd) -{ - LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex); - - LASSERT (the_lnet.ln_init); - LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd); - LASSERT (lnd->lnd_refcount == 0); - - list_del (&lnd->lnd_list); - CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type)); - - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); -} - -#ifndef LNET_USE_LIB_FREELIST - -int -lnet_descriptor_setup (void) -{ - return 0; -} - -void -lnet_descriptor_cleanup (void) -{ -} - -#else - -int -lnet_freelist_init (lnet_freelist_t *fl, int n, int size) -{ - char *space; - - LASSERT (n > 0); - - size += offsetof (lnet_freeobj_t, fo_contents); - - LIBCFS_ALLOC(space, n * size); - if (space == NULL) - return (-ENOMEM); - - CFS_INIT_LIST_HEAD (&fl->fl_list); - fl->fl_objs = space; - fl->fl_nobjs = n; - fl->fl_objsize = size; - - do - { - memset (space, 0, size); - list_add ((struct list_head *)space, &fl->fl_list); - space += size; - } while (--n != 0); - - return (0); -} - -void -lnet_freelist_fini (lnet_freelist_t *fl) -{ - struct list_head *el; - int count; - - if (fl->fl_nobjs == 0) - return; - - count = 0; - for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next) - count++; - - LASSERT (count == fl->fl_nobjs); - - LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); - memset (fl, 0, sizeof (fl)); -} - -int -lnet_descriptor_setup (void) -{ - /* NB on failure caller must still call lnet_descriptor_cleanup */ - /* ****** */ - int rc; - - memset (&the_lnet.ln_free_mes, 0, sizeof (the_lnet.ln_free_mes)); - memset (&the_lnet.ln_free_msgs, 0, sizeof (the_lnet.ln_free_msgs)); - memset (&the_lnet.ln_free_mds, 0, sizeof (the_lnet.ln_free_mds)); - memset (&the_lnet.ln_free_eqs, 0, sizeof (the_lnet.ln_free_eqs)); - - rc = lnet_freelist_init(&the_lnet.ln_free_mes, - MAX_MES, sizeof (lnet_me_t)); - if (rc != 0) - return (rc); - - rc = lnet_freelist_init(&the_lnet.ln_free_msgs, - MAX_MSGS, sizeof (lnet_msg_t)); - if (rc != 0) - return (rc); - - rc = lnet_freelist_init(&the_lnet.ln_free_mds, - MAX_MDS, sizeof (lnet_libmd_t)); - if (rc != 0) - return (rc); - - rc = lnet_freelist_init(&the_lnet.ln_free_eqs, - MAX_EQS, sizeof (lnet_eq_t)); - return (rc); -} - -void -lnet_descriptor_cleanup (void) -{ - lnet_freelist_fini (&the_lnet.ln_free_mes); - lnet_freelist_fini (&the_lnet.ln_free_msgs); - lnet_freelist_fini (&the_lnet.ln_free_mds); - lnet_freelist_fini (&the_lnet.ln_free_eqs); -} - -#endif - -__u64 -lnet_create_interface_cookie (void) -{ - /* NB the interface cookie in wire handles guards against delayed - * replies and ACKs appearing valid after reboot. Initialisation time, - * even if it's only implemented to millisecond resolution is probably - * easily good enough. */ - struct timeval tv; - __u64 cookie; -#ifndef __KERNEL__ - int rc = gettimeofday (&tv, NULL); - LASSERT (rc == 0); -#else - do_gettimeofday(&tv); -#endif - cookie = tv.tv_sec; - cookie *= 1000000; - cookie += tv.tv_usec; - return cookie; -} - -int -lnet_setup_handle_hash (void) -{ - int i; - - /* Arbitrary choice of hash table size */ -#ifdef __KERNEL__ - the_lnet.ln_lh_hash_size = CFS_PAGE_SIZE / sizeof (struct list_head); -#else - the_lnet.ln_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4; -#endif - LIBCFS_ALLOC(the_lnet.ln_lh_hash_table, - the_lnet.ln_lh_hash_size * sizeof (struct list_head)); - if (the_lnet.ln_lh_hash_table == NULL) - return (-ENOMEM); - - for (i = 0; i < the_lnet.ln_lh_hash_size; i++) - CFS_INIT_LIST_HEAD (&the_lnet.ln_lh_hash_table[i]); - - the_lnet.ln_next_object_cookie = LNET_COOKIE_TYPES; - - return (0); -} - -void -lnet_cleanup_handle_hash (void) -{ - if (the_lnet.ln_lh_hash_table == NULL) - return; - - LIBCFS_FREE(the_lnet.ln_lh_hash_table, - the_lnet.ln_lh_hash_size * sizeof (struct list_head)); -} - -lnet_libhandle_t * -lnet_lookup_cookie (__u64 cookie, int type) -{ - /* ALWAYS called with LNET_LOCK held */ - struct list_head *list; - struct list_head *el; - unsigned int hash; - - if ((cookie & (LNET_COOKIE_TYPES - 1)) != type) - return (NULL); - - hash = ((unsigned int)cookie) % the_lnet.ln_lh_hash_size; - list = &the_lnet.ln_lh_hash_table[hash]; - - list_for_each (el, list) { - lnet_libhandle_t *lh = list_entry (el, lnet_libhandle_t, - lh_hash_chain); - - if (lh->lh_cookie == cookie) - return (lh); - } - - return (NULL); -} - -void -lnet_initialise_handle (lnet_libhandle_t *lh, int type) -{ - /* ALWAYS called with LNET_LOCK held */ - unsigned int hash; - - LASSERT (type >= 0 && type < LNET_COOKIE_TYPES); - lh->lh_cookie = the_lnet.ln_next_object_cookie | type; - the_lnet.ln_next_object_cookie += LNET_COOKIE_TYPES; - - hash = ((unsigned int)lh->lh_cookie) % the_lnet.ln_lh_hash_size; - list_add (&lh->lh_hash_chain, &the_lnet.ln_lh_hash_table[hash]); -} - -void -lnet_invalidate_handle (lnet_libhandle_t *lh) -{ - /* ALWAYS called with LNET_LOCK held */ - list_del (&lh->lh_hash_chain); -} - -int -lnet_init_finalizers(void) -{ -#ifdef __KERNEL__ - int i; - - the_lnet.ln_nfinalizers = num_online_cpus(); - - LIBCFS_ALLOC(the_lnet.ln_finalizers, - the_lnet.ln_nfinalizers * - sizeof(*the_lnet.ln_finalizers)); - if (the_lnet.ln_finalizers == NULL) { - CERROR("Can't allocate ln_finalizers\n"); - return -ENOMEM; - } - - for (i = 0; i < the_lnet.ln_nfinalizers; i++) - the_lnet.ln_finalizers[i] = NULL; -#else - the_lnet.ln_finalizing = 0; -#endif - - CFS_INIT_LIST_HEAD(&the_lnet.ln_finalizeq); - return 0; -} - -void -lnet_fini_finalizers(void) -{ -#ifdef __KERNEL__ - int i; - - for (i = 0; i < the_lnet.ln_nfinalizers; i++) - LASSERT (the_lnet.ln_finalizers[i] == NULL); - - LIBCFS_FREE(the_lnet.ln_finalizers, - the_lnet.ln_nfinalizers * - sizeof(*the_lnet.ln_finalizers)); -#else - LASSERT (!the_lnet.ln_finalizing); -#endif - LASSERT (list_empty(&the_lnet.ln_finalizeq)); -} - -#ifndef __KERNEL__ -/* Temporary workaround to allow uOSS and test programs force server - * mode in userspace. See comments near ln_server_mode_flag in - * lnet/lib-types.h */ - -void -lnet_server_mode() { - the_lnet.ln_server_mode_flag = 1; -} -#endif - -int -lnet_prepare(lnet_pid_t requested_pid) -{ - /* Prepare to bring up the network */ - int rc = 0; - int i; - - LASSERT (the_lnet.ln_refcount == 0); - - the_lnet.ln_routing = 0; - -#ifdef __KERNEL__ - LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0); - the_lnet.ln_pid = requested_pid; -#else - if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */ - LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0); - - if (cfs_curproc_uid())/* Only root can run user-space server */ - return -EPERM; - the_lnet.ln_pid = requested_pid; - - } else {/* client case (liblustre) */ - - /* My PID must be unique on this node and flag I'm userspace */ - the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG; - } -#endif - - rc = lnet_descriptor_setup(); - if (rc != 0) - goto failed0; - - memset(&the_lnet.ln_counters, 0, - sizeof(the_lnet.ln_counters)); - - CFS_INIT_LIST_HEAD (&the_lnet.ln_active_msgs); - CFS_INIT_LIST_HEAD (&the_lnet.ln_active_mds); - CFS_INIT_LIST_HEAD (&the_lnet.ln_active_eqs); - CFS_INIT_LIST_HEAD (&the_lnet.ln_test_peers); - CFS_INIT_LIST_HEAD (&the_lnet.ln_nis); - CFS_INIT_LIST_HEAD (&the_lnet.ln_zombie_nis); - CFS_INIT_LIST_HEAD (&the_lnet.ln_remote_nets); - CFS_INIT_LIST_HEAD (&the_lnet.ln_routers); - - the_lnet.ln_interface_cookie = lnet_create_interface_cookie(); - - lnet_init_rtrpools(); - - rc = lnet_setup_handle_hash (); - if (rc != 0) - goto failed0; - - rc = lnet_create_peer_table(); - if (rc != 0) - goto failed1; - - rc = lnet_init_finalizers(); - if (rc != 0) - goto failed2; - - the_lnet.ln_nportals = MAX_PORTALS; - LIBCFS_ALLOC(the_lnet.ln_portals, - the_lnet.ln_nportals * - sizeof(*the_lnet.ln_portals)); - if (the_lnet.ln_portals == NULL) { - rc = -ENOMEM; - goto failed3; - } - - for (i = 0; i < the_lnet.ln_nportals; i++) { - CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_ml)); - CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_msgq)); - the_lnet.ln_portals[i].ptl_options = 0; - } - - return 0; - - failed3: - lnet_fini_finalizers(); - failed2: - lnet_destroy_peer_table(); - failed1: - lnet_cleanup_handle_hash(); - failed0: - lnet_descriptor_cleanup(); - return rc; -} - -int -lnet_unprepare (void) -{ - int idx; - - /* NB no LNET_LOCK since this is the last reference. All LND instances - * have shut down already, so it is safe to unlink and free all - * descriptors, even those that appear committed to a network op (eg MD - * with non-zero pending count) */ - - lnet_fail_nid(LNET_NID_ANY, 0); - - LASSERT (list_empty(&the_lnet.ln_test_peers)); - LASSERT (the_lnet.ln_refcount == 0); - LASSERT (list_empty(&the_lnet.ln_nis)); - LASSERT (list_empty(&the_lnet.ln_zombie_nis)); - LASSERT (the_lnet.ln_nzombie_nis == 0); - - for (idx = 0; idx < the_lnet.ln_nportals; idx++) { - LASSERT (list_empty(&the_lnet.ln_portals[idx].ptl_msgq)); - - while (!list_empty (&the_lnet.ln_portals[idx].ptl_ml)) { - lnet_me_t *me = list_entry (the_lnet.ln_portals[idx].ptl_ml.next, - lnet_me_t, me_list); - - CERROR ("Active me %p on exit\n", me); - list_del (&me->me_list); - lnet_me_free (me); - } - } - - while (!list_empty (&the_lnet.ln_active_mds)) { - lnet_libmd_t *md = list_entry (the_lnet.ln_active_mds.next, - lnet_libmd_t, md_list); - - CERROR ("Active md %p on exit\n", md); - list_del (&md->md_list); - lnet_md_free (md); - } - - while (!list_empty (&the_lnet.ln_active_eqs)) { - lnet_eq_t *eq = list_entry (the_lnet.ln_active_eqs.next, - lnet_eq_t, eq_list); - - CERROR ("Active eq %p on exit\n", eq); - list_del (&eq->eq_list); - lnet_eq_free (eq); - } - - while (!list_empty (&the_lnet.ln_active_msgs)) { - lnet_msg_t *msg = list_entry (the_lnet.ln_active_msgs.next, - lnet_msg_t, msg_activelist); - - CERROR ("Active msg %p on exit\n", msg); - LASSERT (msg->msg_onactivelist); - msg->msg_onactivelist = 0; - list_del (&msg->msg_activelist); - lnet_msg_free (msg); - } - - LIBCFS_FREE(the_lnet.ln_portals, - the_lnet.ln_nportals * sizeof(*the_lnet.ln_portals)); - - lnet_free_rtrpools(); - lnet_fini_finalizers(); - lnet_destroy_peer_table(); - lnet_cleanup_handle_hash(); - lnet_descriptor_cleanup(); - - return (0); -} - -lnet_ni_t * -lnet_net2ni_locked (__u32 net) -{ - struct list_head *tmp; - lnet_ni_t *ni; - - list_for_each (tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (lnet_ptlcompat_matchnet(LNET_NIDNET(ni->ni_nid), net)) { - lnet_ni_addref_locked(ni); - return ni; - } - } - - return NULL; -} - -int -lnet_islocalnet (__u32 net) -{ - lnet_ni_t *ni; - - LNET_LOCK(); - ni = lnet_net2ni_locked(net); - if (ni != NULL) - lnet_ni_decref_locked(ni); - LNET_UNLOCK(); - - return ni != NULL; -} - -lnet_ni_t * -lnet_nid2ni_locked (lnet_nid_t nid) -{ - struct list_head *tmp; - lnet_ni_t *ni; - - list_for_each (tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (lnet_ptlcompat_matchnid(ni->ni_nid, nid)) { - lnet_ni_addref_locked(ni); - return ni; - } - } - - return NULL; -} - -int -lnet_islocalnid (lnet_nid_t nid) -{ - lnet_ni_t *ni; - - LNET_LOCK(); - ni = lnet_nid2ni_locked(nid); - if (ni != NULL) - lnet_ni_decref_locked(ni); - LNET_UNLOCK(); - - return ni != NULL; -} - -int -lnet_count_acceptor_nis (lnet_ni_t **first_ni) -{ - /* Return the # of NIs that need the acceptor. Return the first one in - * *first_ni so the acceptor can pass it connections "blind" to retain - * binary compatibility. */ - int count = 0; -#if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) - struct list_head *tmp; - lnet_ni_t *ni; - - LNET_LOCK(); - list_for_each (tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (ni->ni_lnd->lnd_accept != NULL) { - /* This LND uses the acceptor */ - if (count == 0 && first_ni != NULL) { - lnet_ni_addref_locked(ni); - *first_ni = ni; - } - count++; - } - } - - LNET_UNLOCK(); - -#endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */ - return count; -} - -void -lnet_shutdown_lndnis (void) -{ - int i; - int islo; - lnet_ni_t *ni; - - /* NB called holding the global mutex */ - - /* All quiet on the API front */ - LASSERT (!the_lnet.ln_shutdown); - LASSERT (the_lnet.ln_refcount == 0); - LASSERT (list_empty(&the_lnet.ln_zombie_nis)); - LASSERT (the_lnet.ln_nzombie_nis == 0); - LASSERT (list_empty(&the_lnet.ln_remote_nets)); - - LNET_LOCK(); - the_lnet.ln_shutdown = 1; /* flag shutdown */ - - /* Unlink NIs from the global table */ - while (!list_empty(&the_lnet.ln_nis)) { - ni = list_entry(the_lnet.ln_nis.next, - lnet_ni_t, ni_list); - list_del (&ni->ni_list); - - the_lnet.ln_nzombie_nis++; - lnet_ni_decref_locked(ni); /* drop apini's ref */ - } - - /* Drop the cached eqwait NI. */ - if (the_lnet.ln_eqwaitni != NULL) { - lnet_ni_decref_locked(the_lnet.ln_eqwaitni); - the_lnet.ln_eqwaitni = NULL; - } - - /* Drop the cached loopback NI. */ - if (the_lnet.ln_loni != NULL) { - lnet_ni_decref_locked(the_lnet.ln_loni); - the_lnet.ln_loni = NULL; - } - - LNET_UNLOCK(); - - /* Clear lazy portals and drop delayed messages which hold refs - * on their lnet_msg_t::msg_rxpeer */ - for (i = 0; i < the_lnet.ln_nportals; i++) - LNetClearLazyPortal(i); - - /* Clear the peer table and wait for all peers to go (they hold refs on - * their NIs) */ - lnet_clear_peer_table(); - - LNET_LOCK(); - /* Now wait for the NI's I just nuked to show up on apini_zombie_nis - * and shut them down in guaranteed thread context */ - i = 2; - while (the_lnet.ln_nzombie_nis != 0) { - - while (list_empty(&the_lnet.ln_zombie_nis)) { - LNET_UNLOCK(); - ++i; - if ((i & (-i)) == i) - CDEBUG(D_WARNING,"Waiting for %d zombie NIs\n", - the_lnet.ln_nzombie_nis); - cfs_pause(cfs_time_seconds(1)); - LNET_LOCK(); - } - - ni = list_entry(the_lnet.ln_zombie_nis.next, - lnet_ni_t, ni_list); - list_del(&ni->ni_list); - ni->ni_lnd->lnd_refcount--; - - LNET_UNLOCK(); - - islo = ni->ni_lnd->lnd_type == LOLND; - - LASSERT (!in_interrupt ()); - (ni->ni_lnd->lnd_shutdown)(ni); - - /* can't deref lnd anymore now; it might have unregistered - * itself... */ - - if (!islo) - CDEBUG(D_LNI, "Removed LNI %s\n", - libcfs_nid2str(ni->ni_nid)); - - LIBCFS_FREE(ni, sizeof(*ni)); - - LNET_LOCK(); - the_lnet.ln_nzombie_nis--; - } - - the_lnet.ln_shutdown = 0; - LNET_UNLOCK(); - - if (the_lnet.ln_network_tokens != NULL) { - LIBCFS_FREE(the_lnet.ln_network_tokens, - the_lnet.ln_network_tokens_nob); - the_lnet.ln_network_tokens = NULL; - } -} - -int -lnet_startup_lndnis (void) -{ - lnd_t *lnd; - lnet_ni_t *ni; - struct list_head nilist; - int rc = 0; - int lnd_type; - int nicount = 0; - char *nets = lnet_get_networks(); - - CFS_INIT_LIST_HEAD(&nilist); - - if (nets == NULL) - goto failed; - - rc = lnet_parse_networks(&nilist, nets); - if (rc != 0) - goto failed; - - while (!list_empty(&nilist)) { - ni = list_entry(nilist.next, lnet_ni_t, ni_list); - lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); - - LASSERT (libcfs_isknown_lnd(lnd_type)); - - LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex); - lnd = lnet_find_lnd_by_type(lnd_type); - -#ifdef __KERNEL__ - if (lnd == NULL) { - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); - rc = request_module(libcfs_lnd2modname(lnd_type)); - LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex); - - lnd = lnet_find_lnd_by_type(lnd_type); - if (lnd == NULL) { - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); - CERROR("Can't load LND %s, module %s, rc=%d\n", - libcfs_lnd2str(lnd_type), - libcfs_lnd2modname(lnd_type), rc); -#ifndef CONFIG_KMOD - LCONSOLE_ERROR_MSG(0x104, "Your kernel must be " - "compiled with CONFIG_KMOD set for " - "automatic module loading."); -#endif - goto failed; - } - } -#else - if (lnd == NULL) { - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); - CERROR("LND %s not supported\n", - libcfs_lnd2str(lnd_type)); - goto failed; - } -#endif - - ni->ni_refcount = 1; - - LNET_LOCK(); - lnd->lnd_refcount++; - LNET_UNLOCK(); - - ni->ni_lnd = lnd; - - rc = (lnd->lnd_startup)(ni); - - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); - - if (rc != 0) { - LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s" - "\n", - rc, libcfs_lnd2str(lnd->lnd_type)); - LNET_LOCK(); - lnd->lnd_refcount--; - LNET_UNLOCK(); - goto failed; - } - - list_del(&ni->ni_list); - - LNET_LOCK(); - list_add_tail(&ni->ni_list, &the_lnet.ln_nis); - LNET_UNLOCK(); - - if (lnd->lnd_type == LOLND) { - lnet_ni_addref(ni); - LASSERT (the_lnet.ln_loni == NULL); - the_lnet.ln_loni = ni; - continue; - } - -#ifndef __KERNEL__ - if (lnd->lnd_wait != NULL) { - if (the_lnet.ln_eqwaitni == NULL) { - lnet_ni_addref(ni); - the_lnet.ln_eqwaitni = ni; - } - } else { -# ifndef HAVE_LIBPTHREAD - LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a " - "single-threaded runtime\n", - libcfs_lnd2str(lnd_type)); - goto failed; -# endif - } -#endif - if (ni->ni_peertxcredits == 0 || - ni->ni_maxtxcredits == 0) { - LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n", - libcfs_lnd2str(lnd->lnd_type), - ni->ni_peertxcredits == 0 ? - "" : "per-peer "); - goto failed; - } - - ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits; - - CDEBUG(D_LNI, "Added LNI %s [%d/%d]\n", - libcfs_nid2str(ni->ni_nid), - ni->ni_peertxcredits, ni->ni_txcredits); - - /* Handle nidstrings for network 0 just like this one */ - if (the_lnet.ln_ptlcompat > 0) { - if (nicount > 0) { - LCONSOLE_ERROR_MSG(0x108, "Can't run > 1 " - "network when portals_compatibility is " - "set\n"); - goto failed; - } - libcfs_setnet0alias(lnd->lnd_type); - } - - nicount++; - } - - if (the_lnet.ln_eqwaitni != NULL && nicount > 1) { - lnd_type = the_lnet.ln_eqwaitni->ni_lnd->lnd_type; - LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network" - "\n", - libcfs_lnd2str(lnd_type)); - goto failed; - } - - return 0; - - failed: - lnet_shutdown_lndnis(); - - while (!list_empty(&nilist)) { - ni = list_entry(nilist.next, lnet_ni_t, ni_list); - list_del(&ni->ni_list); - LIBCFS_FREE(ni, sizeof(*ni)); - } - - return -ENETDOWN; -} - -int -LNetInit(void) -{ - int rc; - - lnet_assert_wire_constants (); - LASSERT (!the_lnet.ln_init); - - memset(&the_lnet, 0, sizeof(the_lnet)); - - rc = lnet_get_portals_compatibility(); - if (rc < 0) - return rc; - - lnet_init_locks(); - CFS_INIT_LIST_HEAD(&the_lnet.ln_lnds); - the_lnet.ln_ptlcompat = rc; - the_lnet.ln_refcount = 0; - the_lnet.ln_init = 1; - -#ifdef __KERNEL__ - /* All LNDs apart from the LOLND are in separate modules. They - * register themselves when their module loads, and unregister - * themselves when their module is unloaded. */ -#else - /* Register LNDs - * NB the order here determines default 'networks=' order */ -# ifdef CRAY_XT3 - LNET_REGISTER_ULND(the_ptllnd); -# endif -# ifdef HAVE_LIBPTHREAD - LNET_REGISTER_ULND(the_tcplnd); -# endif -#endif - lnet_register_lnd(&the_lolnd); - return 0; -} - -void -LNetFini(void) -{ - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount == 0); - - while (!list_empty(&the_lnet.ln_lnds)) - lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next, - lnd_t, lnd_list)); - lnet_fini_locks(); - - the_lnet.ln_init = 0; -} - -int -LNetNIInit(lnet_pid_t requested_pid) -{ - int im_a_router = 0; - int rc; - - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - - LASSERT (the_lnet.ln_init); - CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount); - - if (the_lnet.ln_refcount > 0) { - rc = the_lnet.ln_refcount++; - goto out; - } - - if (requested_pid == LNET_PID_ANY) { - /* Don't instantiate LNET just for me */ - rc = -ENETDOWN; - goto failed0; - } - - rc = lnet_prepare(requested_pid); - if (rc != 0) - goto failed0; - - rc = lnet_startup_lndnis(); - if (rc != 0) - goto failed1; - - rc = lnet_parse_routes(lnet_get_routes(), &im_a_router); - if (rc != 0) - goto failed2; - - rc = lnet_check_routes(); - if (rc != 0) - goto failed2; - - rc = lnet_alloc_rtrpools(im_a_router); - if (rc != 0) - goto failed2; - - rc = lnet_acceptor_start(); - if (rc != 0) - goto failed2; - - the_lnet.ln_refcount = 1; - /* Now I may use my own API functions... */ - - rc = lnet_router_checker_start(); - if (rc != 0) - goto failed3; - - rc = lnet_ping_target_init(); - if (rc != 0) - goto failed4; - - lnet_proc_init(); - goto out; - - failed4: - lnet_router_checker_stop(); - failed3: - the_lnet.ln_refcount = 0; - lnet_acceptor_stop(); - failed2: - lnet_destroy_routes(); - lnet_shutdown_lndnis(); - failed1: - lnet_unprepare(); - failed0: - LASSERT (rc < 0); - out: - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - return rc; -} - -int -LNetNIFini() -{ - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (the_lnet.ln_refcount != 1) { - the_lnet.ln_refcount--; - } else { - LASSERT (!the_lnet.ln_niinit_self); - - lnet_proc_fini(); - lnet_ping_target_fini(); - lnet_router_checker_stop(); - - /* Teardown fns that use my own API functions BEFORE here */ - the_lnet.ln_refcount = 0; - - lnet_acceptor_stop(); - lnet_destroy_routes(); - lnet_shutdown_lndnis(); - lnet_unprepare(); - } - - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - return 0; -} - -int -LNetCtl(unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - lnet_process_id_t id; - lnet_ni_t *ni; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - switch (cmd) { - case IOC_LIBCFS_GET_NI: - rc = LNetGetId(data->ioc_count, &id); - data->ioc_nid = id.nid; - return rc; - - case IOC_LIBCFS_FAIL_NID: - return lnet_fail_nid(data->ioc_nid, data->ioc_count); - - case IOC_LIBCFS_ADD_ROUTE: - rc = lnet_add_route(data->ioc_net, data->ioc_count, - data->ioc_nid); - return (rc != 0) ? rc : lnet_check_routes(); - - case IOC_LIBCFS_DEL_ROUTE: - return lnet_del_route(data->ioc_net, data->ioc_nid); - - case IOC_LIBCFS_GET_ROUTE: - return lnet_get_route(data->ioc_count, - &data->ioc_net, &data->ioc_count, - &data->ioc_nid, &data->ioc_flags); - case IOC_LIBCFS_NOTIFY_ROUTER: - return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, - (time_t)data->ioc_u64[0]); - - case IOC_LIBCFS_PORTALS_COMPATIBILITY: - return the_lnet.ln_ptlcompat; - - case IOC_LIBCFS_LNET_DIST: - rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]); - if (rc < 0 && rc != -EHOSTUNREACH) - return rc; - - data->ioc_u32[0] = rc; - return 0; - - case IOC_LIBCFS_TESTPROTOCOMPAT: - LNET_LOCK(); - the_lnet.ln_testprotocompat = data->ioc_flags; - LNET_UNLOCK(); - return 0; - - case IOC_LIBCFS_PING: - rc = lnet_ping((lnet_process_id_t) {.nid = data->ioc_nid, - .pid = data->ioc_u32[0]}, - data->ioc_u32[1], /* timeout */ - (lnet_process_id_t *)data->ioc_pbuf1, - data->ioc_plen1/sizeof(lnet_process_id_t)); - if (rc < 0) - return rc; - data->ioc_count = rc; - return 0; - - case IOC_LIBCFS_DEBUG_PEER: { - /* CAVEAT EMPTOR: this one designed for calling directly; not - * via an ioctl */ - lnet_process_id_t *id = arg; - - lnet_debug_peer(id->nid); - - ni = lnet_net2ni(LNET_NIDNET(id->nid)); - if (ni == NULL) { - CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(*id)); - } else { - if (ni->ni_lnd->lnd_ctl == NULL) { - CDEBUG(D_WARNING, "No ctl for %s\n", - libcfs_id2str(*id)); - } else { - (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg); - } - - lnet_ni_decref(ni); - } - return 0; - } - - default: - ni = lnet_net2ni(data->ioc_net); - if (ni == NULL) - return -EINVAL; - - if (ni->ni_lnd->lnd_ctl == NULL) - rc = -EINVAL; - else - rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg); - - lnet_ni_decref(ni); - return rc; - } - /* not reached */ -} - -int -LNetGetId(unsigned int index, lnet_process_id_t *id) -{ - lnet_ni_t *ni; - struct list_head *tmp; - int rc = -ENOENT; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - list_for_each(tmp, &the_lnet.ln_nis) { - if (index-- != 0) - continue; - - ni = list_entry(tmp, lnet_ni_t, ni_list); - - id->nid = ni->ni_nid; - id->pid = the_lnet.ln_pid; - rc = 0; - break; - } - - LNET_UNLOCK(); - - return rc; -} - -void -LNetSnprintHandle(char *str, int len, lnet_handle_any_t h) -{ - snprintf(str, len, LPX64, h.cookie); -} - - -int -lnet_ping_target_init(void) -{ - lnet_handle_me_t meh; - lnet_process_id_t id; - int rc; - int rc2; - int n; - int infosz; - int i; - - for (n = 0; ; n++) { - rc = LNetGetId(n, &id); - if (rc == -ENOENT) - break; - - LASSERT (rc == 0); - } - - infosz = offsetof(lnet_ping_info_t, pi_nid[n]); - LIBCFS_ALLOC(the_lnet.ln_ping_info, infosz); - if (the_lnet.ln_ping_info == NULL) { - CERROR("Can't allocate ping info[%d]\n", n); - return -ENOMEM; - } - - the_lnet.ln_ping_info->pi_magic = LNET_PROTO_PING_MAGIC; - the_lnet.ln_ping_info->pi_version = LNET_PROTO_PING_VERSION; - the_lnet.ln_ping_info->pi_pid = the_lnet.ln_pid; - the_lnet.ln_ping_info->pi_nnids = n; - - for (i = 0; i < n; i++) { - rc = LNetGetId(i, &id); - LASSERT (rc == 0); - the_lnet.ln_ping_info->pi_nid[i] = id.nid; - } - - /* We can have a tiny EQ since we only need to see the unlink event on - * teardown, which by definition is the last one! */ - rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq); - if (rc != 0) { - CERROR("Can't allocate ping EQ: %d\n", rc); - goto failed_0; - } - - rc = LNetMEAttach(LNET_RESERVED_PORTAL, - (lnet_process_id_t){.nid = LNET_NID_ANY, - .pid = LNET_PID_ANY}, - LNET_PROTO_PING_MATCHBITS, 0LL, - LNET_UNLINK, LNET_INS_AFTER, - &meh); - if (rc != 0) { - CERROR("Can't create ping ME: %d\n", rc); - goto failed_1; - } - - rc = LNetMDAttach(meh, - (lnet_md_t){.start = the_lnet.ln_ping_info, - .length = infosz, - .threshold = LNET_MD_THRESH_INF, - .options = (LNET_MD_OP_GET | - LNET_MD_TRUNCATE | - LNET_MD_MANAGE_REMOTE), - .eq_handle = the_lnet.ln_ping_target_eq}, - LNET_RETAIN, - &the_lnet.ln_ping_target_md); - if (rc != 0) { - CERROR("Can't attach ping MD: %d\n", rc); - goto failed_2; - } - - return 0; - - failed_2: - rc2 = LNetMEUnlink(meh); - LASSERT (rc2 == 0); - failed_1: - rc2 = LNetEQFree(the_lnet.ln_ping_target_eq); - LASSERT (rc2 == 0); - failed_0: - LIBCFS_FREE(the_lnet.ln_ping_info, infosz); - - return rc; -} - -void -lnet_ping_target_fini(void) -{ - lnet_event_t event; - int rc; - int which; - int timeout_ms = 1000; - cfs_sigset_t blocked = cfs_block_allsigs(); - - LNetMDUnlink(the_lnet.ln_ping_target_md); - /* NB md could be busy; this just starts the unlink */ - - for (;;) { - rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1, - timeout_ms, &event, &which); - - /* I expect overflow... */ - LASSERT (rc >= 0 || rc == -EOVERFLOW); - - if (rc == 0) { - /* timed out: provide a diagnostic */ - CWARN("Still waiting for ping MD to unlink\n"); - timeout_ms *= 2; - continue; - } - - /* Got a valid event */ - if (event.unlinked) - break; - } - - rc = LNetEQFree(the_lnet.ln_ping_target_eq); - LASSERT (rc == 0); - - LIBCFS_FREE(the_lnet.ln_ping_info, - offsetof(lnet_ping_info_t, - pi_nid[the_lnet.ln_ping_info->pi_nnids])); - - cfs_restore_sigs(blocked); -} - -int -lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids) -{ - lnet_handle_eq_t eqh; - lnet_handle_md_t mdh; - lnet_event_t event; - int which; - int unlinked = 0; - int replied = 0; - const int a_long_time = 60000; /* mS */ - int infosz = offsetof(lnet_ping_info_t, pi_nid[n_ids]); - lnet_ping_info_t *info; - lnet_process_id_t tmpid; - int i; - int nob; - int rc; - int rc2; - cfs_sigset_t blocked; - - if (n_ids <= 0 || - id.nid == LNET_NID_ANY || - timeout_ms > 500000 || /* arbitrary limit! */ - n_ids > 20) /* arbitrary limit! */ - return -EINVAL; - - if (id.pid == LNET_PID_ANY) - id.pid = LUSTRE_SRV_LNET_PID; - - LIBCFS_ALLOC(info, infosz); - if (info == NULL) - return -ENOMEM; - - /* NB 2 events max (including any unlink event) */ - rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh); - if (rc != 0) { - CERROR("Can't allocate EQ: %d\n", rc); - goto out_0; - } - - rc = LNetMDBind((lnet_md_t){.start = info, - .length = infosz, - .threshold = 2, /* GET/REPLY */ - .options = LNET_MD_TRUNCATE, - .eq_handle = eqh}, - LNET_UNLINK, - &mdh); - if (rc != 0) { - CERROR("Can't bind MD: %d\n", rc); - goto out_1; - } - - rc = LNetGet(LNET_NID_ANY, mdh, id, - LNET_RESERVED_PORTAL, - LNET_PROTO_PING_MATCHBITS, 0); - - if (rc != 0) { - /* Don't CERROR; this could be deliberate! */ - - rc2 = LNetMDUnlink(mdh); - LASSERT (rc2 == 0); - - /* NB must wait for the UNLINK event below... */ - unlinked = 1; - timeout_ms = a_long_time; - } - - do { - /* MUST block for unlink to complete */ - if (unlinked) - blocked = cfs_block_allsigs(); - - rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which); - - if (unlinked) - cfs_restore_sigs(blocked); - - CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2, - (rc2 <= 0) ? -1 : event.type, - (rc2 <= 0) ? -1 : event.status, - (rc2 > 0 && event.unlinked) ? " unlinked" : ""); - - LASSERT (rc2 != -EOVERFLOW); /* can't miss anything */ - - if (rc2 <= 0 || event.status != 0) { - /* timeout or error */ - if (!replied && rc == 0) - rc = (rc2 < 0) ? rc2 : - (rc2 == 0) ? -ETIMEDOUT : - event.status; - - if (!unlinked) { - /* Ensure completion in finite time... */ - LNetMDUnlink(mdh); - /* No assertion (racing with network) */ - unlinked = 1; - timeout_ms = a_long_time; - } else if (rc2 == 0) { - /* timed out waiting for unlink */ - CWARN("ping %s: late network completion\n", - libcfs_id2str(id)); - } - - } else if (event.type == LNET_EVENT_REPLY) { - replied = 1; - rc = event.mlength; - } - - } while (rc2 <= 0 || !event.unlinked); - - if (!replied) { - if (rc >= 0) - CWARN("%s: Unexpected rc >= 0 but no reply!\n", - libcfs_id2str(id)); - rc = -EIO; - goto out_1; - } - - nob = rc; - LASSERT (nob >= 0 && nob <= infosz); - - rc = -EPROTO; /* if I can't parse... */ - - if (nob < 8) { - /* can't check magic/version */ - CERROR("%s: ping info too short %d\n", - libcfs_id2str(id), nob); - goto out_1; - } - - if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) { - /* NB I might be swabbing garbage until I check below, but it - * doesn't matter */ - __swab32s(&info->pi_version); - __swab32s(&info->pi_pid); - __swab32s(&info->pi_nnids); - for (i = 0; i < info->pi_nnids && i < n_ids; i++) - __swab64s(&info->pi_nid[i]); - - } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) { - CERROR("%s: Unexpected magic %08x\n", - libcfs_id2str(id), info->pi_magic); - goto out_1; - } - - if (info->pi_version != LNET_PROTO_PING_VERSION) { - CERROR("%s: Unexpected version 0x%x\n", - libcfs_id2str(id), info->pi_version); - goto out_1; - } - - if (nob < offsetof(lnet_ping_info_t, pi_nid[0])) { - CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), - nob, (int)offsetof(lnet_ping_info_t, pi_nid[0])); - goto out_1; - } - - if (info->pi_nnids < n_ids) - n_ids = info->pi_nnids; - - if (nob < offsetof(lnet_ping_info_t, pi_nid[n_ids])) { - CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), - nob, (int)offsetof(lnet_ping_info_t, pi_nid[n_ids])); - goto out_1; - } - - rc = -EFAULT; /* If I SEGV... */ - - for (i = 0; i < n_ids; i++) { - tmpid.pid = info->pi_pid; - tmpid.nid = info->pi_nid[i]; -#ifdef __KERNEL__ - if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) - goto out_1; -#else - ids[i] = tmpid; -#endif - } - rc = info->pi_nnids; - - out_1: - rc2 = LNetEQFree(eqh); - if (rc2 != 0) - CERROR("rc2 %d\n", rc2); - LASSERT (rc2 == 0); - - out_0: - LIBCFS_FREE(info, infosz); - return rc; -} diff --git a/lnet/lnet/autoMakefile.am b/lnet/lnet/autoMakefile.am deleted file mode 100644 index 9ce40fe8434d88383299eebe94855be3a122e5c8..0000000000000000000000000000000000000000 --- a/lnet/lnet/autoMakefile.am +++ /dev/null @@ -1,46 +0,0 @@ -my_sources = api-errno.c api-ni.c config.c \ - lib-me.c lib-msg.c lib-eq.c \ - lib-md.c lib-move.c lo.c \ - router.c router_proc.c \ - acceptor.c peer.c - - -if LIBLUSTRE -noinst_LIBRARIES= liblnet.a -liblnet_a_SOURCES= $(my_sources) -liblnet_a_CPPFLAGS = $(LLCPPFLAGS) -liblnet_a_CFLAGS = $(LLCFLAGS) -endif - -if MODULES - -if LINUX -modulenet_DATA = lnet$(KMODEXT) -endif # LINUX - -if DARWIN -macos_PROGRAMS := lnet - -lnet_SOURCES := api-errno.c api-ni.c config.c -lnet_SOURCES += lib-me.c lib-msg.c lib-eq.c lib-md.c -lnet_SOURCES += lib-move.c module.c lo.c router.c router_proc.c -lnet_SOURCES += acceptor.c peer.c - -lnet_CFLAGS := $(EXTRA_KCFLAGS) -lnet_LDFLAGS := $(EXTRA_KLDFLAGS) -lnet_LDADD := $(EXTRA_KLIBS) - -plist_DATA := Info.plist - -install_data_hook := fix-kext-ownership - -endif # DARWIN - -endif # MODULES - -install-data-hook: $(install_data_hook) - -EXTRA_DIST := Info.plist - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ lnet -DIST_SOURCES = $(lnet-objs:%.o=%.c) diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c deleted file mode 100644 index 7425877cfa9d42b6316fdf9e38404d37157b125a..0000000000000000000000000000000000000000 --- a/lnet/lnet/config.c +++ /dev/null @@ -1,1389 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2005 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -typedef struct { /* tmp struct for parsing routes */ - struct list_head ltb_list; /* stash on lists */ - int ltb_size; /* allocated size */ - char ltb_text[0]; /* text buffer */ -} lnet_text_buf_t; - -static int lnet_tbnob = 0; /* track text buf allocation */ -#define LNET_MAX_TEXTBUF_NOB (64<<10) /* bound allocation */ -#define LNET_SINGLE_TEXTBUF_NOB (4<<10) - -typedef struct { - struct list_head lre_list; /* stash in a list */ - int lre_min; /* min value */ - int lre_max; /* max value */ - int lre_stride; /* stride */ -} lnet_range_expr_t; - -static int lnet_re_alloc = 0; /* track expr allocation */ - -void -lnet_syntax(char *name, char *str, int offset, int width) -{ - static char dots[LNET_SINGLE_TEXTBUF_NOB]; - static char dashes[LNET_SINGLE_TEXTBUF_NOB]; - - memset(dots, '.', sizeof(dots)); - dots[sizeof(dots)-1] = 0; - memset(dashes, '-', sizeof(dashes)); - dashes[sizeof(dashes)-1] = 0; - - LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str); - LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n", - (int)strlen(name), dots, offset, dots, - (width < 1) ? 0 : width - 1, dashes); -} - -int -lnet_issep (char c) -{ - switch (c) { - case '\n': - case '\r': - case ';': - return 1; - default: - return 0; - } -} - -int -lnet_iswhite (char c) -{ - switch (c) { - case ' ': - case '\t': - case '\n': - case '\r': - return 1; - default: - return 0; - } -} - -char * -lnet_trimwhite(char *str) -{ - char *end; - - while (lnet_iswhite(*str)) - str++; - - end = str + strlen(str); - while (end > str) { - if (!lnet_iswhite(end[-1])) - break; - end--; - } - - *end = 0; - return str; -} - -int -lnet_net_unique(__u32 net, struct list_head *nilist) -{ - struct list_head *tmp; - lnet_ni_t *ni; - - list_for_each (tmp, nilist) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (LNET_NIDNET(ni->ni_nid) == net) - return 0; - } - - return 1; -} - -lnet_ni_t * -lnet_new_ni(__u32 net, struct list_head *nilist) -{ - lnet_ni_t *ni; - - if (!lnet_net_unique(net, nilist)) { - LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n", - libcfs_net2str(net)); - return NULL; - } - - LIBCFS_ALLOC(ni, sizeof(*ni)); - if (ni == NULL) { - CERROR("Out of memory creating network %s\n", - libcfs_net2str(net)); - return NULL; - } - - /* zero counters/flags, NULL pointers... */ - memset(ni, 0, sizeof(*ni)); - - /* LND will fill in the address part of the NID */ - ni->ni_nid = LNET_MKNID(net, 0); - CFS_INIT_LIST_HEAD(&ni->ni_txq); - - list_add_tail(&ni->ni_list, nilist); - return ni; -} - -int -lnet_parse_networks(struct list_head *nilist, char *networks) -{ - int tokensize = strlen(networks) + 1; - char *tokens; - char *str; - lnet_ni_t *ni; - __u32 net; - int nnets = 0; - - if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) { - /* _WAY_ conservative */ - LCONSOLE_ERROR_MSG(0x112, "Can't parse networks: string too " - "long\n"); - return -EINVAL; - } - - LIBCFS_ALLOC(tokens, tokensize); - if (tokens == NULL) { - CERROR("Can't allocate net tokens\n"); - return -ENOMEM; - } - - the_lnet.ln_network_tokens = tokens; - the_lnet.ln_network_tokens_nob = tokensize; - memcpy (tokens, networks, tokensize); - str = tokens; - - /* Add in the loopback network */ - ni = lnet_new_ni(LNET_MKNET(LOLND, 0), nilist); - if (ni == NULL) - goto failed; - - while (str != NULL && *str != 0) { - char *comma = strchr(str, ','); - char *bracket = strchr(str, '('); - int niface; - char *iface; - - /* NB we don't check interface conflicts here; it's the LNDs - * responsibility (if it cares at all) */ - - if (bracket == NULL || - (comma != NULL && comma < bracket)) { - - /* no interface list specified */ - - if (comma != NULL) - *comma++ = 0; - net = libcfs_str2net(lnet_trimwhite(str)); - - if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("networks", networks, - str - tokens, strlen(str)); - LCONSOLE_ERROR_MSG(0x113, "Unrecognised network" - " type\n"); - goto failed; - } - - if (LNET_NETTYP(net) != LOLND && /* loopback is implicit */ - lnet_new_ni(net, nilist) == NULL) - goto failed; - - str = comma; - continue; - } - - *bracket = 0; - net = libcfs_str2net(lnet_trimwhite(str)); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("networks", networks, - str - tokens, strlen(str)); - goto failed; - } - - if (nnets > 0 && - the_lnet.ln_ptlcompat > 0) { - LCONSOLE_ERROR_MSG(0x114, "Only 1 network supported when" - " 'portals_compatible' is set\n"); - goto failed; - } - - nnets++; - ni = lnet_new_ni(net, nilist); - if (ni == NULL) - goto failed; - - niface = 0; - iface = bracket + 1; - - bracket = strchr(iface, ')'); - if (bracket == NULL) { - lnet_syntax("networks", networks, - iface - tokens, strlen(iface)); - goto failed; - } - - *bracket = 0; - do { - comma = strchr(iface, ','); - if (comma != NULL) - *comma++ = 0; - - iface = lnet_trimwhite(iface); - if (*iface == 0) { - lnet_syntax("networks", networks, - iface - tokens, strlen(iface)); - goto failed; - } - - if (niface == LNET_MAX_INTERFACES) { - LCONSOLE_ERROR_MSG(0x115, "Too many interfaces " - "for net %s\n", - libcfs_net2str(net)); - goto failed; - } - - ni->ni_interfaces[niface++] = iface; - iface = comma; - } while (iface != NULL); - - str = bracket + 1; - comma = strchr(bracket + 1, ','); - if (comma != NULL) { - *comma = 0; - str = lnet_trimwhite(str); - if (*str != 0) { - lnet_syntax("networks", networks, - str - tokens, strlen(str)); - goto failed; - } - str = comma + 1; - continue; - } - - str = lnet_trimwhite(str); - if (*str != 0) { - lnet_syntax("networks", networks, - str - tokens, strlen(str)); - goto failed; - } - } - - LASSERT (!list_empty(nilist)); - return 0; - - failed: - while (!list_empty(nilist)) { - ni = list_entry(nilist->next, lnet_ni_t, ni_list); - - list_del(&ni->ni_list); - LIBCFS_FREE(ni, sizeof(*ni)); - } - LIBCFS_FREE(tokens, tokensize); - the_lnet.ln_network_tokens = NULL; - - return -EINVAL; -} - -lnet_text_buf_t * -lnet_new_text_buf (int str_len) -{ - lnet_text_buf_t *ltb; - int nob; - - /* NB allocate space for the terminating 0 */ - nob = offsetof(lnet_text_buf_t, ltb_text[str_len + 1]); - if (nob > LNET_SINGLE_TEXTBUF_NOB) { - /* _way_ conservative for "route net gateway..." */ - CERROR("text buffer too big\n"); - return NULL; - } - - if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) { - CERROR("Too many text buffers\n"); - return NULL; - } - - LIBCFS_ALLOC(ltb, nob); - if (ltb == NULL) - return NULL; - - ltb->ltb_size = nob; - ltb->ltb_text[0] = 0; - lnet_tbnob += nob; - return ltb; -} - -void -lnet_free_text_buf (lnet_text_buf_t *ltb) -{ - lnet_tbnob -= ltb->ltb_size; - LIBCFS_FREE(ltb, ltb->ltb_size); -} - -void -lnet_free_text_bufs(struct list_head *tbs) -{ - lnet_text_buf_t *ltb; - - while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list); - - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - } -} - -void -lnet_print_text_bufs(struct list_head *tbs) -{ - struct list_head *tmp; - lnet_text_buf_t *ltb; - - list_for_each (tmp, tbs) { - ltb = list_entry(tmp, lnet_text_buf_t, ltb_list); - - CDEBUG(D_WARNING, "%s\n", ltb->ltb_text); - } - - CDEBUG(D_WARNING, "%d allocated\n", lnet_tbnob); -} - -int -lnet_str2tbs_sep (struct list_head *tbs, char *str) -{ - struct list_head pending; - char *sep; - int nob; - int i; - lnet_text_buf_t *ltb; - - CFS_INIT_LIST_HEAD(&pending); - - /* Split 'str' into separate commands */ - for (;;) { - /* skip leading whitespace */ - while (lnet_iswhite(*str)) - str++; - - /* scan for separator or comment */ - for (sep = str; *sep != 0; sep++) - if (lnet_issep(*sep) || *sep == '#') - break; - - nob = sep - str; - if (nob > 0) { - ltb = lnet_new_text_buf(nob); - if (ltb == NULL) { - lnet_free_text_bufs(&pending); - return -1; - } - - for (i = 0; i < nob; i++) - if (lnet_iswhite(str[i])) - ltb->ltb_text[i] = ' '; - else - ltb->ltb_text[i] = str[i]; - - ltb->ltb_text[nob] = 0; - - list_add_tail(<b->ltb_list, &pending); - } - - if (*sep == '#') { - /* scan for separator */ - do { - sep++; - } while (*sep != 0 && !lnet_issep(*sep)); - } - - if (*sep == 0) - break; - - str = sep + 1; - } - - list_splice(&pending, tbs->prev); - return 0; -} - -int -lnet_expand1tb (struct list_head *list, - char *str, char *sep1, char *sep2, - char *item, int itemlen) -{ - int len1 = sep1 - str; - int len2 = strlen(sep2 + 1); - lnet_text_buf_t *ltb; - - LASSERT (*sep1 == '['); - LASSERT (*sep2 == ']'); - - ltb = lnet_new_text_buf(len1 + itemlen + len2); - if (ltb == NULL) - return -ENOMEM; - - memcpy(ltb->ltb_text, str, len1); - memcpy(<b->ltb_text[len1], item, itemlen); - memcpy(<b->ltb_text[len1+itemlen], sep2 + 1, len2); - ltb->ltb_text[len1 + itemlen + len2] = 0; - - list_add_tail(<b->ltb_list, list); - return 0; -} - -int -lnet_str2tbs_expand (struct list_head *tbs, char *str) -{ - char num[16]; - struct list_head pending; - char *sep; - char *sep2; - char *parsed; - char *enditem; - int lo; - int hi; - int stride; - int i; - int nob; - int scanned; - - CFS_INIT_LIST_HEAD(&pending); - - sep = strchr(str, '['); - if (sep == NULL) /* nothing to expand */ - return 0; - - sep2 = strchr(sep, ']'); - if (sep2 == NULL) - goto failed; - - for (parsed = sep; parsed < sep2; parsed = enditem) { - - enditem = ++parsed; - while (enditem < sep2 && *enditem != ',') - enditem++; - - if (enditem == parsed) /* no empty items */ - goto failed; - - if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi, &stride, &scanned) < 3) { - - if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) { - - /* simple string enumeration */ - if (lnet_expand1tb(&pending, str, sep, sep2, - parsed, enditem - parsed) != 0) - goto failed; - - continue; - } - - stride = 1; - } - - /* range expansion */ - - if (enditem != parsed + scanned) /* no trailing junk */ - goto failed; - - if (hi < 0 || lo < 0 || stride < 0 || hi < lo || - (hi - lo) % stride != 0) - goto failed; - - for (i = lo; i <= hi; i += stride) { - - snprintf(num, sizeof(num), "%d", i); - nob = strlen(num); - if (nob + 1 == sizeof(num)) - goto failed; - - if (lnet_expand1tb(&pending, str, sep, sep2, - num, nob) != 0) - goto failed; - } - } - - list_splice(&pending, tbs->prev); - return 1; - - failed: - lnet_free_text_bufs(&pending); - return -1; -} - -int -lnet_parse_hops (char *str, unsigned int *hops) -{ - int len = strlen(str); - int nob = len; - - return (sscanf(str, "%u%n", hops, &nob) >= 1 && - nob == len && - *hops > 0 && *hops < 256); -} - - -int -lnet_parse_route (char *str, int *im_a_router) -{ - /* static scratch buffer OK (single threaded) */ - static char cmd[LNET_SINGLE_TEXTBUF_NOB]; - - struct list_head nets; - struct list_head gateways; - struct list_head *tmp1; - struct list_head *tmp2; - __u32 net; - lnet_nid_t nid; - lnet_text_buf_t *ltb; - int rc; - char *sep; - char *token = str; - int ntokens = 0; - int myrc = -1; - unsigned int hops; - int got_hops = 0; - - CFS_INIT_LIST_HEAD(&gateways); - CFS_INIT_LIST_HEAD(&nets); - - /* save a copy of the string for error messages */ - strncpy(cmd, str, sizeof(cmd) - 1); - cmd[sizeof(cmd) - 1] = 0; - - sep = str; - for (;;) { - /* scan for token start */ - while (lnet_iswhite(*sep)) - sep++; - if (*sep == 0) { - if (ntokens < (got_hops ? 3 : 2)) - goto token_error; - break; - } - - ntokens++; - token = sep++; - - /* scan for token end */ - while (*sep != 0 && !lnet_iswhite(*sep)) - sep++; - if (*sep != 0) - *sep++ = 0; - - if (ntokens == 1) { - tmp2 = &nets; /* expanding nets */ - } else if (ntokens == 2 && - lnet_parse_hops(token, &hops)) { - got_hops = 1; /* got a hop count */ - continue; - } else { - tmp2 = &gateways; /* expanding gateways */ - } - - ltb = lnet_new_text_buf(strlen(token)); - if (ltb == NULL) - goto out; - - strcpy(ltb->ltb_text, token); - tmp1 = <b->ltb_list; - list_add_tail(tmp1, tmp2); - - while (tmp1 != tmp2) { - ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list); - - rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text); - if (rc < 0) - goto token_error; - - tmp1 = tmp1->next; - - if (rc > 0) { /* expanded! */ - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - continue; - } - - if (ntokens == 1) { - net = libcfs_str2net(ltb->ltb_text); - if (net == LNET_NIDNET(LNET_NID_ANY) || - LNET_NETTYP(net) == LOLND) - goto token_error; - } else { - nid = libcfs_str2nid(ltb->ltb_text); - if (nid == LNET_NID_ANY || - LNET_NETTYP(LNET_NIDNET(nid)) == LOLND) - goto token_error; - } - } - } - - if (!got_hops) - hops = 1; - - LASSERT (!list_empty(&nets)); - LASSERT (!list_empty(&gateways)); - - list_for_each (tmp1, &nets) { - ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list); - net = libcfs_str2net(ltb->ltb_text); - LASSERT (net != LNET_NIDNET(LNET_NID_ANY)); - - list_for_each (tmp2, &gateways) { - ltb = list_entry(tmp2, lnet_text_buf_t, ltb_list); - nid = libcfs_str2nid(ltb->ltb_text); - LASSERT (nid != LNET_NID_ANY); - - if (lnet_islocalnid(nid)) { - *im_a_router = 1; - continue; - } - - rc = lnet_add_route (net, hops, nid); - if (rc != 0) { - CERROR("Can't create route " - "to %s via %s\n", - libcfs_net2str(net), - libcfs_nid2str(nid)); - goto out; - } - } - } - - myrc = 0; - goto out; - - token_error: - lnet_syntax("routes", cmd, token - str, strlen(token)); - out: - lnet_free_text_bufs(&nets); - lnet_free_text_bufs(&gateways); - return myrc; -} - -int -lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router) -{ - lnet_text_buf_t *ltb; - - while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list); - - if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) { - lnet_free_text_bufs(tbs); - return -EINVAL; - } - - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - } - - return 0; -} - -int -lnet_parse_routes (char *routes, int *im_a_router) -{ - struct list_head tbs; - int rc = 0; - - *im_a_router = 0; - - if (the_lnet.ln_ptlcompat > 0 && - routes[0] != 0) { - /* Can't route when running in compatibility mode */ - LCONSOLE_ERROR_MSG(0x116, "Route tables are not supported when " - "'portals_compatible' is set\n"); - return -EINVAL; - } - - CFS_INIT_LIST_HEAD(&tbs); - - if (lnet_str2tbs_sep(&tbs, routes) < 0) { - CERROR("Error parsing routes\n"); - rc = -EINVAL; - } else { - rc = lnet_parse_route_tbs(&tbs, im_a_router); - } - - LASSERT (lnet_tbnob == 0); - return rc; -} - -void -lnet_print_range_exprs(struct list_head *exprs) -{ - struct list_head *e; - lnet_range_expr_t *lre; - - list_for_each(e, exprs) { - lre = list_entry(exprs->next, lnet_range_expr_t, lre_list); - - CDEBUG(D_WARNING, "%d-%d/%d\n", - lre->lre_min, lre->lre_max, lre->lre_stride); - } - - CDEBUG(D_WARNING, "%d allocated\n", lnet_re_alloc); -} - -int -lnet_new_range_expr(struct list_head *exprs, int min, int max, int stride) -{ - lnet_range_expr_t *lre; - - CDEBUG(D_NET, "%d-%d/%d\n", min, max, stride); - - if (min < 0 || min > 255 || min > max || stride < 0) - return -EINVAL; - - LIBCFS_ALLOC(lre, sizeof(*lre)); - if (lre == NULL) - return -ENOMEM; - - lnet_re_alloc++; - - lre->lre_min = min; - lre->lre_max = max; - lre->lre_stride = stride; - - list_add(&lre->lre_list, exprs); - return 0; -} - -void -lnet_destroy_range_exprs(struct list_head *exprs) -{ - lnet_range_expr_t *lre; - - while (!list_empty(exprs)) { - lre = list_entry(exprs->next, lnet_range_expr_t, lre_list); - - list_del(&lre->lre_list); - LIBCFS_FREE(lre, sizeof(*lre)); - lnet_re_alloc--; - } -} - -int -lnet_parse_range_expr(struct list_head *exprs, char *str) -{ - int nob = strlen(str); - char *sep; - int n; - int x; - int y; - int z; - int rc; - - if (nob == 0) - return -EINVAL; - - if (!strcmp(str, "*")) /* match all */ - return lnet_new_range_expr(exprs, 0, 255, 1); - - n = nob; - if (sscanf(str, "%u%n", &x, &n) >= 1 && n == nob) { - /* simple number */ - return lnet_new_range_expr(exprs, x, x, 1); - } - - /* Has to be an expansion */ - if (!(str[0] == '[' && nob > 2 && str[nob-1] == ']')) - return -EINVAL; - - nob -= 2; - str++; - str[nob] = 0; - - do { - /* Comma separated list of expressions... */ - sep = strchr(str, ','); - if (sep != NULL) - *sep++ = 0; - - nob = strlen(str); - n = nob; - if (sscanf(str, "%u%n", &x, &n) >= 1 && n == nob) { - /* simple number */ - rc = lnet_new_range_expr(exprs, x, x, 1); - if (rc != 0) - return rc; - - continue; - } - - n = nob; - if (sscanf(str, "%u-%u%n", &x, &y, &n) >= 2 && n == nob) { - /* simple range */ - rc = lnet_new_range_expr(exprs, x, y, 1); - if (rc != 0) - return rc; - continue; - } - - n = nob; - if (sscanf(str, "%u-%u/%u%n", &x, &y, &z, &n) >= 3 && n == nob) { - /* strided range */ - rc = lnet_new_range_expr(exprs, x, y, z); - if (rc != 0) - return rc; - continue; - } - - return -EINVAL; - - } while ((str = sep) != NULL); - - return 0; -} - -int -lnet_match_network_token(char *token, __u32 *ipaddrs, int nip) -{ - struct list_head exprs[4]; - struct list_head *e; - lnet_range_expr_t *re; - char *str; - int i; - int j; - __u32 ip; - int n; - int match; - int rc; - - for (i = 0; i < 4; i++) - CFS_INIT_LIST_HEAD(&exprs[i]); - - for (i = 0; i < 4; i++) { - str = token; - if (i != 3) { - token = strchr(token, '.'); - if (token == NULL) { - rc = -EINVAL; - goto out; - } - *token++ = 0; - } - - rc = lnet_parse_range_expr(&exprs[i], str); - if (rc != 0) { - LASSERT (rc < 0); - goto out; - } - } - - for (match = i = 0; !match && i < nip; i++) { - ip = ipaddrs[i]; - - for (match = 1, j = 0; match && j < 4; j++) { - n = (ip >> (8 * (3 - j))) & 0xff; - match = 0; - - list_for_each(e, &exprs[j]) { - re = list_entry(e, lnet_range_expr_t, lre_list); - - if (re->lre_min <= n && - re->lre_max >= n && - (n - re->lre_min) % re->lre_stride == 0) { - match = 1; - break; - } - } - } - } - - rc = match ? 1 : 0; - - out: - for (i = 0; i < 4; i++) - lnet_destroy_range_exprs(&exprs[i]); - LASSERT (lnet_re_alloc == 0); - - return rc; -} - -int -lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip) -{ - static char tokens[LNET_SINGLE_TEXTBUF_NOB]; - - int matched = 0; - int ntokens = 0; - int len; - char *net = NULL; - char *sep; - char *token; - int rc; - - LASSERT (strlen(net_entry) < sizeof(tokens)); - - /* work on a copy of the string */ - strcpy(tokens, net_entry); - sep = tokens; - for (;;) { - /* scan for token start */ - while (lnet_iswhite(*sep)) - sep++; - if (*sep == 0) - break; - - token = sep++; - - /* scan for token end */ - while (*sep != 0 && !lnet_iswhite(*sep)) - sep++; - if (*sep != 0) - *sep++ = 0; - - if (ntokens++ == 0) { - net = token; - continue; - } - - len = strlen(token); - - rc = lnet_match_network_token(token, ipaddrs, nip); - if (rc < 0) { - lnet_syntax("ip2nets", net_entry, - token - tokens, len); - return rc; - } - - matched |= (rc != 0); - } - - if (!matched) - return 0; - - strcpy(net_entry, net); /* replace with matched net */ - return 1; -} - -__u32 -lnet_netspec2net(char *netspec) -{ - char *bracket = strchr(netspec, '('); - __u32 net; - - if (bracket != NULL) - *bracket = 0; - - net = libcfs_str2net(netspec); - - if (bracket != NULL) - *bracket = '('; - - return net; -} - -int -lnet_splitnets(char *source, struct list_head *nets) -{ - int offset = 0; - int offset2; - int len; - lnet_text_buf_t *tb; - lnet_text_buf_t *tb2; - struct list_head *t; - char *sep; - char *bracket; - __u32 net; - - LASSERT (!list_empty(nets)); - LASSERT (nets->next == nets->prev); /* single entry */ - - tb = list_entry(nets->next, lnet_text_buf_t, ltb_list); - - for (;;) { - sep = strchr(tb->ltb_text, ','); - bracket = strchr(tb->ltb_text, '('); - - if (sep != NULL && - bracket != NULL && - bracket < sep) { - /* netspec lists interfaces... */ - - offset2 = offset + (bracket - tb->ltb_text); - len = strlen(bracket); - - bracket = strchr(bracket + 1, ')'); - - if (bracket == NULL || - !(bracket[1] == ',' || bracket[1] == 0)) { - lnet_syntax("ip2nets", source, offset2, len); - return -EINVAL; - } - - sep = (bracket[1] == 0) ? NULL : bracket + 1; - } - - if (sep != NULL) - *sep++ = 0; - - net = lnet_netspec2net(tb->ltb_text); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("ip2nets", source, offset, - strlen(tb->ltb_text)); - return -EINVAL; - } - - list_for_each(t, nets) { - tb2 = list_entry(t, lnet_text_buf_t, ltb_list); - - if (tb2 == tb) - continue; - - if (net == lnet_netspec2net(tb2->ltb_text)) { - /* duplicate network */ - lnet_syntax("ip2nets", source, offset, - strlen(tb->ltb_text)); - return -EINVAL; - } - } - - if (sep == NULL) - return 0; - - offset += sep - tb->ltb_text; - tb2 = lnet_new_text_buf(strlen(sep)); - if (tb2 == NULL) - return -ENOMEM; - - strcpy(tb2->ltb_text, sep); - list_add_tail(&tb2->ltb_list, nets); - - tb = tb2; - } -} - -int -lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) -{ - static char networks[LNET_SINGLE_TEXTBUF_NOB]; - static char source[LNET_SINGLE_TEXTBUF_NOB]; - - struct list_head raw_entries; - struct list_head matched_nets; - struct list_head current_nets; - struct list_head *t; - struct list_head *t2; - lnet_text_buf_t *tb; - lnet_text_buf_t *tb2; - __u32 net1; - __u32 net2; - int len; - int count; - int dup; - int rc; - - CFS_INIT_LIST_HEAD(&raw_entries); - if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) { - CERROR("Error parsing ip2nets\n"); - LASSERT (lnet_tbnob == 0); - return -EINVAL; - } - - CFS_INIT_LIST_HEAD(&matched_nets); - CFS_INIT_LIST_HEAD(¤t_nets); - networks[0] = 0; - count = 0; - len = 0; - rc = 0; - - while (!list_empty(&raw_entries)) { - tb = list_entry(raw_entries.next, lnet_text_buf_t, ltb_list); - - strncpy(source, tb->ltb_text, sizeof(source)-1); - source[sizeof(source)-1] = 0; - - /* replace ltb_text with the network(s) add on match */ - rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip); - if (rc < 0) - break; - - list_del(&tb->ltb_list); - - if (rc == 0) { /* no match */ - lnet_free_text_buf(tb); - continue; - } - - /* split into separate networks */ - CFS_INIT_LIST_HEAD(¤t_nets); - list_add(&tb->ltb_list, ¤t_nets); - rc = lnet_splitnets(source, ¤t_nets); - if (rc < 0) - break; - - dup = 0; - list_for_each (t, ¤t_nets) { - tb = list_entry(t, lnet_text_buf_t, ltb_list); - net1 = lnet_netspec2net(tb->ltb_text); - LASSERT (net1 != LNET_NIDNET(LNET_NID_ANY)); - - list_for_each(t2, &matched_nets) { - tb2 = list_entry(t2, lnet_text_buf_t, ltb_list); - net2 = lnet_netspec2net(tb2->ltb_text); - LASSERT (net2 != LNET_NIDNET(LNET_NID_ANY)); - - if (net1 == net2) { - dup = 1; - break; - } - } - - if (dup) - break; - } - - if (dup) { - lnet_free_text_bufs(¤t_nets); - continue; - } - - list_for_each_safe(t, t2, ¤t_nets) { - tb = list_entry(t, lnet_text_buf_t, ltb_list); - - list_del(&tb->ltb_list); - list_add_tail(&tb->ltb_list, &matched_nets); - - len += snprintf(networks + len, sizeof(networks) - len, - "%s%s", (len == 0) ? "" : ",", - tb->ltb_text); - - if (len >= sizeof(networks)) { - CERROR("Too many matched networks\n"); - rc = -E2BIG; - goto out; - } - } - - count++; - } - - out: - lnet_free_text_bufs(&raw_entries); - lnet_free_text_bufs(&matched_nets); - lnet_free_text_bufs(¤t_nets); - LASSERT (lnet_tbnob == 0); - - if (rc < 0) - return rc; - - *networksp = networks; - return count; -} - -#ifdef __KERNEL__ -void -lnet_ipaddr_free_enumeration(__u32 *ipaddrs, int nip) -{ - LIBCFS_FREE(ipaddrs, nip * sizeof(*ipaddrs)); -} - -int -lnet_ipaddr_enumerate (__u32 **ipaddrsp) -{ - int up; - __u32 netmask; - __u32 *ipaddrs; - __u32 *ipaddrs2; - int nip; - char **ifnames; - int nif = libcfs_ipif_enumerate(&ifnames); - int i; - int rc; - - if (nif <= 0) - return nif; - - LIBCFS_ALLOC(ipaddrs, nif * sizeof(*ipaddrs)); - if (ipaddrs == NULL) { - CERROR("Can't allocate ipaddrs[%d]\n", nif); - libcfs_ipif_free_enumeration(ifnames, nif); - return -ENOMEM; - } - - for (i = nip = 0; i < nif; i++) { - if (!strcmp(ifnames[i], "lo")) - continue; - - rc = libcfs_ipif_query(ifnames[i], &up, - &ipaddrs[nip], &netmask); - if (rc != 0) { - CWARN("Can't query interface %s: %d\n", - ifnames[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s: it's down\n", - ifnames[i]); - continue; - } - - nip++; - } - - libcfs_ipif_free_enumeration(ifnames, nif); - - if (nip == nif) { - *ipaddrsp = ipaddrs; - } else { - if (nip > 0) { - LIBCFS_ALLOC(ipaddrs2, nip * sizeof(*ipaddrs2)); - if (ipaddrs2 == NULL) { - CERROR("Can't allocate ipaddrs[%d]\n", nip); - nip = -ENOMEM; - } else { - memcpy(ipaddrs2, ipaddrs, - nip * sizeof(*ipaddrs)); - *ipaddrsp = ipaddrs2; - rc = nip; - } - } - lnet_ipaddr_free_enumeration(ipaddrs, nif); - } - return nip; -} - -int -lnet_parse_ip2nets (char **networksp, char *ip2nets) -{ - __u32 *ipaddrs; - int nip = lnet_ipaddr_enumerate(&ipaddrs); - int rc; - - if (nip < 0) { - LCONSOLE_ERROR_MSG(0x117, "Error %d enumerating local IP " - "interfaces for ip2nets to match\n", nip); - return nip; - } - - if (nip == 0) { - LCONSOLE_ERROR_MSG(0x118, "No local IP interfaces " - "for ip2nets to match\n"); - return -ENOENT; - } - - rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip); - lnet_ipaddr_free_enumeration(ipaddrs, nip); - - if (rc < 0) { - LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc); - return rc; - } - - if (rc == 0) { - LCONSOLE_ERROR_MSG(0x11a, "ip2nets does not match " - "any local IP interfaces\n"); - return -ENOENT; - } - - return 0; -} - -int -lnet_set_ip_niaddr (lnet_ni_t *ni) -{ - __u32 net = LNET_NIDNET(ni->ni_nid); - char **names; - int n; - __u32 ip; - __u32 netmask; - int up; - int i; - int rc; - - /* Convenience for LNDs that use the IP address of a local interface as - * the local address part of their NID */ - - if (ni->ni_interfaces[0] != NULL) { - - CLASSERT (LNET_MAX_INTERFACES > 1); - - if (ni->ni_interfaces[1] != NULL) { - CERROR("Net %s doesn't support multiple interfaces\n", - libcfs_net2str(net)); - return -EPERM; - } - - rc = libcfs_ipif_query(ni->ni_interfaces[0], - &up, &ip, &netmask); - if (rc != 0) { - CERROR("Net %s can't query interface %s: %d\n", - libcfs_net2str(net), ni->ni_interfaces[0], rc); - return -EPERM; - } - - if (!up) { - CERROR("Net %s can't use interface %s: it's down\n", - libcfs_net2str(net), ni->ni_interfaces[0]); - return -ENETDOWN; - } - - ni->ni_nid = LNET_MKNID(net, ip); - return 0; - } - - n = libcfs_ipif_enumerate(&names); - if (n <= 0) { - CERROR("Net %s can't enumerate interfaces: %d\n", - libcfs_net2str(net), n); - return 0; - } - - for (i = 0; i < n; i++) { - if (!strcmp(names[i], "lo")) /* skip the loopback IF */ - continue; - - rc = libcfs_ipif_query(names[i], &up, &ip, &netmask); - - if (rc != 0) { - CWARN("Net %s can't query interface %s: %d\n", - libcfs_net2str(net), names[i], rc); - continue; - } - - if (!up) { - CWARN("Net %s ignoring interface %s (down)\n", - libcfs_net2str(net), names[i]); - continue; - } - - libcfs_ipif_free_enumeration(names, n); - ni->ni_nid = LNET_MKNID(net, ip); - return 0; - } - - CERROR("Net %s can't find any interfaces\n", libcfs_net2str(net)); - libcfs_ipif_free_enumeration(names, n); - return -ENOENT; -} -EXPORT_SYMBOL(lnet_set_ip_niaddr); - -#endif diff --git a/lnet/lnet/lib-eq.c b/lnet/lnet/lib-eq.c deleted file mode 100644 index 35801a4cd76f61c8f19a8ddf676dc413725eac62..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-eq.c +++ /dev/null @@ -1,318 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-eq.c - * Library level Event queue management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -int -LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback, - lnet_handle_eq_t *handle) -{ - lnet_eq_t *eq; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - /* We need count to be a power of 2 so that when eq_{enq,deq}_seq - * overflow, they don't skip entries, so the queue has the same - * apparant capacity at all times */ - - if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */ - do { /* knock off all but the top bit... */ - count &= ~LOWEST_BIT_SET (count); - } while (count != LOWEST_BIT_SET(count)); - - count <<= 1; /* ...and round up */ - } - - if (count == 0) /* catch bad parameter / overflow on roundup */ - return (-EINVAL); - - eq = lnet_eq_alloc(); - if (eq == NULL) - return (-ENOMEM); - - LIBCFS_ALLOC(eq->eq_events, count * sizeof(lnet_event_t)); - if (eq->eq_events == NULL) { - LNET_LOCK(); - lnet_eq_free (eq); - LNET_UNLOCK(); - - return -ENOMEM; - } - - /* NB this resets all event sequence numbers to 0, to be earlier - * than eq_deq_seq */ - memset(eq->eq_events, 0, count * sizeof(lnet_event_t)); - - eq->eq_deq_seq = 1; - eq->eq_enq_seq = 1; - eq->eq_size = count; - eq->eq_refcount = 0; - eq->eq_callback = callback; - - LNET_LOCK(); - - lnet_initialise_handle (&eq->eq_lh, LNET_COOKIE_TYPE_EQ); - list_add (&eq->eq_list, &the_lnet.ln_active_eqs); - - LNET_UNLOCK(); - - lnet_eq2handle(handle, eq); - return (0); -} - -int -LNetEQFree(lnet_handle_eq_t eqh) -{ - lnet_eq_t *eq; - int size; - lnet_event_t *events; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - eq = lnet_handle2eq(&eqh); - if (eq == NULL) { - LNET_UNLOCK(); - return (-ENOENT); - } - - if (eq->eq_refcount != 0) { - LNET_UNLOCK(); - return (-EBUSY); - } - - /* stash for free after lock dropped */ - events = eq->eq_events; - size = eq->eq_size; - - lnet_invalidate_handle (&eq->eq_lh); - list_del (&eq->eq_list); - lnet_eq_free (eq); - - LNET_UNLOCK(); - - LIBCFS_FREE(events, size * sizeof (lnet_event_t)); - - return 0; -} - -int -lib_get_event (lnet_eq_t *eq, lnet_event_t *ev) -{ - int new_index = eq->eq_deq_seq & (eq->eq_size - 1); - lnet_event_t *new_event = &eq->eq_events[new_index]; - int rc; - ENTRY; - - CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n", - new_event, eq->eq_deq_seq, eq->eq_size); - - if (LNET_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) { - RETURN(0); - } - - /* We've got a new event... */ - *ev = *new_event; - - /* ...but did it overwrite an event we've not seen yet? */ - if (eq->eq_deq_seq == new_event->sequence) { - rc = 1; - } else { - /* don't complain with CERROR: some EQs are sized small - * anyway; if it's important, the caller should complain */ - CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n", - eq->eq_deq_seq, new_event->sequence); - rc = -EOVERFLOW; - } - - eq->eq_deq_seq = new_event->sequence + 1; - RETURN(rc); -} - - -int -LNetEQGet (lnet_handle_eq_t eventq, lnet_event_t *event) -{ - int which; - - return LNetEQPoll(&eventq, 1, 0, - event, &which); -} - -int -LNetEQWait (lnet_handle_eq_t eventq, lnet_event_t *event) -{ - int which; - - return LNetEQPoll(&eventq, 1, LNET_TIME_FOREVER, - event, &which); -} - -int -LNetEQPoll (lnet_handle_eq_t *eventqs, int neq, int timeout_ms, - lnet_event_t *event, int *which) -{ - int i; - int rc; -#ifdef __KERNEL__ - cfs_waitlink_t wl; - cfs_time_t now; -#else - struct timeval then; - struct timeval now; -# ifdef HAVE_LIBPTHREAD - struct timespec ts; -# endif - lnet_ni_t *eqwaitni = the_lnet.ln_eqwaitni; -#endif - ENTRY; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (neq < 1) - RETURN(-ENOENT); - - LNET_LOCK(); - - for (;;) { - for (i = 0; i < neq; i++) { - lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]); - - if (eq == NULL) { - LNET_UNLOCK(); - RETURN(-ENOENT); - } - - rc = lib_get_event (eq, event); - if (rc != 0) { - LNET_UNLOCK(); - *which = i; - RETURN(rc); - } - } - -#ifdef __KERNEL__ - if (timeout_ms == 0) { - LNET_UNLOCK(); - RETURN (0); - } - - cfs_waitlink_init(&wl); - set_current_state(TASK_INTERRUPTIBLE); - cfs_waitq_add(&the_lnet.ln_waitq, &wl); - - LNET_UNLOCK(); - - if (timeout_ms < 0) { - cfs_waitq_wait (&wl, CFS_TASK_INTERRUPTIBLE); - } else { - struct timeval tv; - - now = cfs_time_current(); - cfs_waitq_timedwait(&wl, CFS_TASK_INTERRUPTIBLE, - cfs_time_seconds(timeout_ms)/1000); - cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), - &tv); - timeout_ms -= tv.tv_sec * 1000 + tv.tv_usec / 1000; - if (timeout_ms < 0) - timeout_ms = 0; - } - - LNET_LOCK(); - cfs_waitq_del(&the_lnet.ln_waitq, &wl); -#else - if (eqwaitni != NULL) { - /* I have a single NI that I have to call into, to get - * events queued, or to block. */ - lnet_ni_addref_locked(eqwaitni); - LNET_UNLOCK(); - - if (timeout_ms <= 0) { - (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms); - } else { - gettimeofday(&then, NULL); - - (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms); - - gettimeofday(&now, NULL); - timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + - (now.tv_usec - then.tv_usec) / 1000; - if (timeout_ms < 0) - timeout_ms = 0; - } - - LNET_LOCK(); - lnet_ni_decref_locked(eqwaitni); - - /* don't call into eqwaitni again if timeout has - * expired */ - if (timeout_ms == 0) - eqwaitni = NULL; - - continue; /* go back and check for events */ - } - - if (timeout_ms == 0) { - LNET_UNLOCK(); - RETURN (0); - } - -# ifndef HAVE_LIBPTHREAD - /* If I'm single-threaded, LNET fails at startup if it can't - * set the_lnet.ln_eqwaitni correctly. */ - LBUG(); -# else - if (timeout_ms < 0) { - pthread_cond_wait(&the_lnet.ln_cond, - &the_lnet.ln_lock); - } else { - gettimeofday(&then, NULL); - - ts.tv_sec = then.tv_sec + timeout_ms/1000; - ts.tv_nsec = then.tv_usec * 1000 + - (timeout_ms%1000) * 1000000; - if (ts.tv_nsec >= 1000000000) { - ts.tv_sec++; - ts.tv_nsec -= 1000000000; - } - - pthread_cond_timedwait(&the_lnet.ln_cond, - &the_lnet.ln_lock, &ts); - - gettimeofday(&now, NULL); - timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + - (now.tv_usec - then.tv_usec) / 1000; - - if (timeout_ms < 0) - timeout_ms = 0; - } -# endif -#endif - } -} diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c deleted file mode 100644 index 731db566bc3726191fd02c607d6fa2b9f995f95e..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-md.c +++ /dev/null @@ -1,311 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-md.c - * Memory Descriptor management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -/* must be called with LNET_LOCK held */ -void -lnet_md_unlink(lnet_libmd_t *md) -{ - if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) == 0) { - /* first unlink attempt... */ - lnet_me_t *me = md->md_me; - - md->md_flags |= LNET_MD_FLAG_ZOMBIE; - - /* Disassociate from ME (if any), and unlink it if it was created - * with LNET_UNLINK */ - if (me != NULL) { - md->md_me = NULL; - me->me_md = NULL; - if (me->me_unlink == LNET_UNLINK) - lnet_me_unlink(me); - } - - /* ensure all future handle lookups fail */ - lnet_invalidate_handle(&md->md_lh); - } - - if (md->md_refcount != 0) { - CDEBUG(D_NET, "Queueing unlink of md %p\n", md); - return; - } - - CDEBUG(D_NET, "Unlinking md %p\n", md); - - if (md->md_eq != NULL) { - md->md_eq->eq_refcount--; - LASSERT (md->md_eq->eq_refcount >= 0); - } - - list_del (&md->md_list); - lnet_md_free(md); -} - -/* must be called with LNET_LOCK held */ -static int -lib_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink) -{ - lnet_eq_t *eq = NULL; - int i; - unsigned int niov; - int total_length = 0; - - /* NB we are passed an allocated, but uninitialised/active md. - * if we return success, caller may lnet_md_unlink() it. - * otherwise caller may only lnet_md_free() it. - */ - - if (!LNetHandleIsEqual (umd->eq_handle, LNET_EQ_NONE)) { - eq = lnet_handle2eq(&umd->eq_handle); - if (eq == NULL) - return -ENOENT; - } - - /* This implementation doesn't know how to create START events or - * disable END events. Best to LASSERT our caller is compliant so - * we find out quickly... */ - /* TODO - reevaluate what should be here in light of - * the removal of the start and end events - * maybe there we shouldn't even allow LNET_EQ_NONE!) - LASSERT (eq == NULL); - */ - - lmd->md_me = NULL; - lmd->md_start = umd->start; - lmd->md_offset = 0; - lmd->md_max_size = umd->max_size; - lmd->md_options = umd->options; - lmd->md_user_ptr = umd->user_ptr; - lmd->md_eq = eq; - lmd->md_threshold = umd->threshold; - lmd->md_refcount = 0; - lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0; - - if ((umd->options & LNET_MD_IOVEC) != 0) { - - if ((umd->options & LNET_MD_KIOV) != 0) /* Can't specify both */ - return -EINVAL; - - lmd->md_niov = niov = umd->length; - memcpy(lmd->md_iov.iov, umd->start, - niov * sizeof (lmd->md_iov.iov[0])); - - for (i = 0; i < niov; i++) { - /* We take the base address on trust */ - if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */ - return -EINVAL; - - total_length += lmd->md_iov.iov[i].iov_len; - } - - lmd->md_length = total_length; - - if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || - umd->max_size > total_length)) // illegal max_size - return -EINVAL; - - } else if ((umd->options & LNET_MD_KIOV) != 0) { -#ifndef __KERNEL__ - return -EINVAL; -#else - lmd->md_niov = niov = umd->length; - memcpy(lmd->md_iov.kiov, umd->start, - niov * sizeof (lmd->md_iov.kiov[0])); - - for (i = 0; i < niov; i++) { - /* We take the page pointer on trust */ - if (lmd->md_iov.kiov[i].kiov_offset + - lmd->md_iov.kiov[i].kiov_len > CFS_PAGE_SIZE ) - return -EINVAL; /* invalid length */ - - total_length += lmd->md_iov.kiov[i].kiov_len; - } - - lmd->md_length = total_length; - - if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || - umd->max_size > total_length)) // illegal max_size - return -EINVAL; -#endif - } else { /* contiguous */ - lmd->md_length = umd->length; - lmd->md_niov = niov = 1; - lmd->md_iov.iov[0].iov_base = umd->start; - lmd->md_iov.iov[0].iov_len = umd->length; - - if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || - umd->max_size > umd->length)) // illegal max_size - return -EINVAL; - } - - if (eq != NULL) - eq->eq_refcount++; - - /* It's good; let handle2md succeed and add to active mds */ - lnet_initialise_handle (&lmd->md_lh, LNET_COOKIE_TYPE_MD); - list_add (&lmd->md_list, &the_lnet.ln_active_mds); - - return 0; -} - -/* must be called with LNET_LOCK held */ -void -lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd) -{ - /* NB this doesn't copy out all the iov entries so when a - * discontiguous MD is copied out, the target gets to know the - * original iov pointer (in start) and the number of entries it had - * and that's all. - */ - umd->start = lmd->md_start; - umd->length = ((lmd->md_options & (LNET_MD_IOVEC | LNET_MD_KIOV)) == 0) ? - lmd->md_length : lmd->md_niov; - umd->threshold = lmd->md_threshold; - umd->max_size = lmd->md_max_size; - umd->options = lmd->md_options; - umd->user_ptr = lmd->md_user_ptr; - lnet_eq2handle(&umd->eq_handle, lmd->md_eq); -} - -int -LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, - lnet_unlink_t unlink, lnet_handle_md_t *handle) -{ - lnet_me_t *me; - lnet_libmd_t *md; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 && - umd.length > LNET_MAX_IOV) /* too many fragments */ - return -EINVAL; - - md = lnet_md_alloc(&umd); - if (md == NULL) - return -ENOMEM; - - LNET_LOCK(); - - me = lnet_handle2me(&meh); - if (me == NULL) { - rc = -ENOENT; - } else if (me->me_md != NULL) { - rc = -EBUSY; - } else { - rc = lib_md_build(md, &umd, unlink); - if (rc == 0) { - me->me_md = md; - md->md_me = me; - - lnet_md2handle(handle, md); - - /* check if this MD matches any blocked msgs */ - lnet_match_blocked_msg(md); /* expects LNET_LOCK held */ - - LNET_UNLOCK(); - return (0); - } - } - - lnet_md_free (md); - - LNET_UNLOCK(); - return (rc); -} - -int -LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle) -{ - lnet_libmd_t *md; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 && - umd.length > LNET_MAX_IOV) /* too many fragments */ - return -EINVAL; - - md = lnet_md_alloc(&umd); - if (md == NULL) - return -ENOMEM; - - LNET_LOCK(); - - rc = lib_md_build(md, &umd, unlink); - - if (rc == 0) { - lnet_md2handle(handle, md); - - LNET_UNLOCK(); - return (0); - } - - lnet_md_free (md); - - LNET_UNLOCK(); - return (rc); -} - -int -LNetMDUnlink (lnet_handle_md_t mdh) -{ - lnet_event_t ev; - lnet_libmd_t *md; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - md = lnet_handle2md(&mdh); - if (md == NULL) { - LNET_UNLOCK(); - return -ENOENT; - } - - /* If the MD is busy, lnet_md_unlink just marks it for deletion, and - * when the NAL is done, the completion event flags that the MD was - * unlinked. Otherwise, we enqueue an event now... */ - - if (md->md_eq != NULL && - md->md_refcount == 0) { - lnet_build_unlink_event(md, &ev); - lnet_enq_event_locked(md->md_eq, &ev); - } - - lnet_md_unlink(md); - - LNET_UNLOCK(); - return 0; -} - diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c deleted file mode 100644 index c5b12866c935db6c3672627f6e4f69851440f814..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-me.c +++ /dev/null @@ -1,182 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-me.c - * Match Entry management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -int -LNetMEAttach(unsigned int portal, - lnet_process_id_t match_id, - __u64 match_bits, __u64 ignore_bits, - lnet_unlink_t unlink, lnet_ins_pos_t pos, - lnet_handle_me_t *handle) -{ - lnet_me_t *me; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (portal >= the_lnet.ln_nportals) - return -EINVAL; - - me = lnet_me_alloc(); - if (me == NULL) - return -ENOMEM; - - LNET_LOCK(); - - me->me_portal = portal; - me->me_match_id = match_id; - me->me_match_bits = match_bits; - me->me_ignore_bits = ignore_bits; - me->me_unlink = unlink; - me->me_md = NULL; - - lnet_initialise_handle (&me->me_lh, LNET_COOKIE_TYPE_ME); - - if (pos == LNET_INS_AFTER) - list_add_tail(&me->me_list, &(the_lnet.ln_portals[portal].ptl_ml)); - else - list_add(&me->me_list, &(the_lnet.ln_portals[portal].ptl_ml)); - - lnet_me2handle(handle, me); - - LNET_UNLOCK(); - - return 0; -} - -int -LNetMEInsert(lnet_handle_me_t current_meh, - lnet_process_id_t match_id, - __u64 match_bits, __u64 ignore_bits, - lnet_unlink_t unlink, lnet_ins_pos_t pos, - lnet_handle_me_t *handle) -{ - lnet_me_t *current_me; - lnet_me_t *new_me; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - new_me = lnet_me_alloc(); - if (new_me == NULL) - return -ENOMEM; - - LNET_LOCK(); - - current_me = lnet_handle2me(¤t_meh); - if (current_me == NULL) { - lnet_me_free (new_me); - - LNET_UNLOCK(); - return -ENOENT; - } - - new_me->me_portal = current_me->me_portal; - new_me->me_match_id = match_id; - new_me->me_match_bits = match_bits; - new_me->me_ignore_bits = ignore_bits; - new_me->me_unlink = unlink; - new_me->me_md = NULL; - - lnet_initialise_handle (&new_me->me_lh, LNET_COOKIE_TYPE_ME); - - if (pos == LNET_INS_AFTER) - list_add_tail(&new_me->me_list, ¤t_me->me_list); - else - list_add(&new_me->me_list, ¤t_me->me_list); - - lnet_me2handle(handle, new_me); - - LNET_UNLOCK(); - - return 0; -} - -int -LNetMEUnlink(lnet_handle_me_t meh) -{ - lnet_me_t *me; - lnet_libmd_t *md; - lnet_event_t ev; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - me = lnet_handle2me(&meh); - if (me == NULL) { - LNET_UNLOCK(); - return -ENOENT; - } - - md = me->me_md; - if (md != NULL && - md->md_eq != NULL && - md->md_refcount == 0) { - lnet_build_unlink_event(md, &ev); - lnet_enq_event_locked(md->md_eq, &ev); - } - - lnet_me_unlink(me); - - LNET_UNLOCK(); - return 0; -} - -/* call with LNET_LOCK please */ -void -lnet_me_unlink(lnet_me_t *me) -{ - list_del (&me->me_list); - - if (me->me_md != NULL) { - me->me_md->md_me = NULL; - lnet_md_unlink(me->me_md); - } - - lnet_invalidate_handle (&me->me_lh); - lnet_me_free(me); -} - -#if 0 -static void -lib_me_dump(lnet_me_t *me) -{ - CWARN("Match Entry %p ("LPX64")\n", me, - me->me_lh.lh_cookie); - - CWARN("\tMatch/Ignore\t= %016lx / %016lx\n", - me->me_match_bits, me->me_ignore_bits); - - CWARN("\tMD\t= %p\n", me->md); - CWARN("\tprev\t= %p\n", - list_entry(me->me_list.prev, lnet_me_t, me_list)); - CWARN("\tnext\t= %p\n", - list_entry(me->me_list.next, lnet_me_t, me_list)); -} -#endif diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c deleted file mode 100644 index a4a1a4c9e09581d0bacb86882e276548f3ecdca6..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-move.c +++ /dev/null @@ -1,2577 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-move.c - * Data movement routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -static int local_nid_dist_zero = 1; -CFS_MODULE_PARM(local_nid_dist_zero, "i", int, 0444, - "Reserved"); - -/* forward ref */ -static void lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg); -static void lnet_drop_delayed_put(lnet_msg_t *msg, char *reason); - -#define LNET_MATCHMD_NONE 0 /* Didn't match */ -#define LNET_MATCHMD_OK 1 /* Matched OK */ -#define LNET_MATCHMD_DROP 2 /* Must be discarded */ - -static int -lnet_try_match_md (int index, int op_mask, lnet_process_id_t src, - unsigned int rlength, unsigned int roffset, - __u64 match_bits, lnet_libmd_t *md, lnet_msg_t *msg, - unsigned int *mlength_out, unsigned int *offset_out) -{ - /* ALWAYS called holding the LNET_LOCK, and can't LNET_UNLOCK; - * lnet_match_blocked_msg() relies on this to avoid races */ - unsigned int offset; - unsigned int mlength; - lnet_me_t *me = md->md_me; - - /* mismatched MD op */ - if ((md->md_options & op_mask) == 0) - return LNET_MATCHMD_NONE; - - /* MD exhausted */ - if (lnet_md_exhausted(md)) - return LNET_MATCHMD_NONE; - - /* mismatched ME nid/pid? */ - if (me->me_match_id.nid != LNET_NID_ANY && - me->me_match_id.nid != src.nid) - return LNET_MATCHMD_NONE; - - if (me->me_match_id.pid != LNET_PID_ANY && - me->me_match_id.pid != src.pid) - return LNET_MATCHMD_NONE; - - /* mismatched ME matchbits? */ - if (((me->me_match_bits ^ match_bits) & ~me->me_ignore_bits) != 0) - return LNET_MATCHMD_NONE; - - /* Hurrah! This _is_ a match; check it out... */ - - if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0) - offset = md->md_offset; - else - offset = roffset; - - if ((md->md_options & LNET_MD_MAX_SIZE) != 0) { - mlength = md->md_max_size; - LASSERT (md->md_offset + mlength <= md->md_length); - } else { - mlength = md->md_length - offset; - } - - if (rlength <= mlength) { /* fits in allowed space */ - mlength = rlength; - } else if ((md->md_options & LNET_MD_TRUNCATE) == 0) { - /* this packet _really_ is too big */ - CERROR("Matching packet from %s, match "LPU64 - " length %d too big: %d left, %d allowed\n", - libcfs_id2str(src), match_bits, rlength, - md->md_length - offset, mlength); - - return LNET_MATCHMD_DROP; - } - - /* Commit to this ME/MD */ - CDEBUG(D_NET, "Incoming %s index %x from %s of " - "length %d/%d into md "LPX64" [%d] + %d\n", - (op_mask == LNET_MD_OP_PUT) ? "put" : "get", - index, libcfs_id2str(src), mlength, rlength, - md->md_lh.lh_cookie, md->md_niov, offset); - - lnet_commit_md(md, msg); - md->md_offset = offset + mlength; - - /* NB Caller will set ev.type and ev.hdr_data */ - msg->msg_ev.initiator = src; - msg->msg_ev.pt_index = index; - msg->msg_ev.match_bits = match_bits; - msg->msg_ev.rlength = rlength; - msg->msg_ev.mlength = mlength; - msg->msg_ev.offset = offset; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - *offset_out = offset; - *mlength_out = mlength; - - /* Auto-unlink NOW, so the ME gets unlinked if required. - * We bumped md->md_refcount above so the MD just gets flagged - * for unlink when it is finalized. */ - if ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 && - lnet_md_exhausted(md)) { - lnet_md_unlink(md); - } - - return LNET_MATCHMD_OK; -} - -static int -lnet_match_md(int index, int op_mask, lnet_process_id_t src, - unsigned int rlength, unsigned int roffset, - __u64 match_bits, lnet_msg_t *msg, - unsigned int *mlength_out, unsigned int *offset_out, - lnet_libmd_t **md_out) -{ - lnet_portal_t *ptl = &the_lnet.ln_portals[index]; - lnet_me_t *me; - lnet_me_t *tmp; - lnet_libmd_t *md; - int rc; - - CDEBUG (D_NET, "Request from %s of length %d into portal %d " - "MB="LPX64"\n", libcfs_id2str(src), rlength, index, match_bits); - - if (index < 0 || index >= the_lnet.ln_nportals) { - CERROR("Invalid portal %d not in [0-%d]\n", - index, the_lnet.ln_nportals); - return LNET_MATCHMD_DROP; - } - - list_for_each_entry_safe (me, tmp, &ptl->ptl_ml, me_list) { - md = me->me_md; - - /* ME attached but MD not attached yet */ - if (md == NULL) - continue; - - LASSERT (me == md->md_me); - - rc = lnet_try_match_md(index, op_mask, src, rlength, - roffset, match_bits, md, msg, - mlength_out, offset_out); - switch (rc) { - default: - LBUG(); - - case LNET_MATCHMD_NONE: - continue; - - case LNET_MATCHMD_OK: - *md_out = md; - return LNET_MATCHMD_OK; - - case LNET_MATCHMD_DROP: - return LNET_MATCHMD_DROP; - } - /* not reached */ - } - - if (op_mask == LNET_MD_OP_GET || - (ptl->ptl_options & LNET_PTL_LAZY) == 0) - return LNET_MATCHMD_DROP; - - return LNET_MATCHMD_NONE; -} - -int -lnet_fail_nid (lnet_nid_t nid, unsigned int threshold) -{ - lnet_test_peer_t *tp; - struct list_head *el; - struct list_head *next; - struct list_head cull; - - LASSERT (the_lnet.ln_init); - - if (threshold != 0) { - /* Adding a new entry */ - LIBCFS_ALLOC(tp, sizeof(*tp)); - if (tp == NULL) - return -ENOMEM; - - tp->tp_nid = nid; - tp->tp_threshold = threshold; - - LNET_LOCK(); - list_add_tail (&tp->tp_list, &the_lnet.ln_test_peers); - LNET_UNLOCK(); - return 0; - } - - /* removing entries */ - CFS_INIT_LIST_HEAD (&cull); - - LNET_LOCK(); - - list_for_each_safe (el, next, &the_lnet.ln_test_peers) { - tp = list_entry (el, lnet_test_peer_t, tp_list); - - if (tp->tp_threshold == 0 || /* needs culling anyway */ - nid == LNET_NID_ANY || /* removing all entries */ - tp->tp_nid == nid) /* matched this one */ - { - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - } - - LNET_UNLOCK(); - - while (!list_empty (&cull)) { - tp = list_entry (cull.next, lnet_test_peer_t, tp_list); - - list_del (&tp->tp_list); - LIBCFS_FREE(tp, sizeof (*tp)); - } - return 0; -} - -static int -fail_peer (lnet_nid_t nid, int outgoing) -{ - lnet_test_peer_t *tp; - struct list_head *el; - struct list_head *next; - struct list_head cull; - int fail = 0; - - CFS_INIT_LIST_HEAD (&cull); - - LNET_LOCK(); - - list_for_each_safe (el, next, &the_lnet.ln_test_peers) { - tp = list_entry (el, lnet_test_peer_t, tp_list); - - if (tp->tp_threshold == 0) { - /* zombie entry */ - if (outgoing) { - /* only cull zombies on outgoing tests, - * since we may be at interrupt priority on - * incoming messages. */ - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - continue; - } - - if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */ - nid == tp->tp_nid) { /* fail this peer */ - fail = 1; - - if (tp->tp_threshold != LNET_MD_THRESH_INF) { - tp->tp_threshold--; - if (outgoing && - tp->tp_threshold == 0) { - /* see above */ - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - } - break; - } - } - - LNET_UNLOCK (); - - while (!list_empty (&cull)) { - tp = list_entry (cull.next, lnet_test_peer_t, tp_list); - list_del (&tp->tp_list); - - LIBCFS_FREE(tp, sizeof (*tp)); - } - - return (fail); -} - -unsigned int -lnet_iov_nob (unsigned int niov, struct iovec *iov) -{ - unsigned int nob = 0; - - while (niov-- > 0) - nob += (iov++)->iov_len; - - return (nob); -} - -void -lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov, unsigned int doffset, - unsigned int nsiov, struct iovec *siov, unsigned int soffset, - unsigned int nob) -{ - /* NB diov, siov are READ-ONLY */ - unsigned int this_nob; - - if (nob == 0) - return; - - /* skip complete frags before 'doffset' */ - LASSERT (ndiov > 0); - while (doffset >= diov->iov_len) { - doffset -= diov->iov_len; - diov++; - ndiov--; - LASSERT (ndiov > 0); - } - - /* skip complete frags before 'soffset' */ - LASSERT (nsiov > 0); - while (soffset >= siov->iov_len) { - soffset -= siov->iov_len; - siov++; - nsiov--; - LASSERT (nsiov > 0); - } - - do { - LASSERT (ndiov > 0); - LASSERT (nsiov > 0); - this_nob = MIN(diov->iov_len - doffset, - siov->iov_len - soffset); - this_nob = MIN(this_nob, nob); - - memcpy ((char *)diov->iov_base + doffset, - (char *)siov->iov_base + soffset, this_nob); - nob -= this_nob; - - if (diov->iov_len > doffset + this_nob) { - doffset += this_nob; - } else { - diov++; - ndiov--; - doffset = 0; - } - - if (siov->iov_len > soffset + this_nob) { - soffset += this_nob; - } else { - siov++; - nsiov--; - soffset = 0; - } - } while (nob > 0); -} - -int -lnet_extract_iov (int dst_niov, struct iovec *dst, - int src_niov, struct iovec *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->iov_len) { /* skip initial frags */ - offset -= src->iov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = src->iov_len - offset; - dst->iov_base = ((char *)src->iov_base) + offset; - - if (len <= frag_len) { - dst->iov_len = len; - return (niov); - } - - dst->iov_len = frag_len; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} - -#ifndef __KERNEL__ -unsigned int -lnet_kiov_nob (unsigned int niov, lnet_kiov_t *kiov) -{ - LASSERT (0); - return (0); -} - -void -lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov, unsigned int doffset, - unsigned int nskiov, lnet_kiov_t *skiov, unsigned int soffset, - unsigned int nob) -{ - LASSERT (0); -} - -void -lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, unsigned int iovoffset, - unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset, - unsigned int nob) -{ - LASSERT (0); -} - -void -lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset, - unsigned int niov, struct iovec *iov, unsigned int iovoffset, - unsigned int nob) -{ - LASSERT (0); -} - -int -lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len) -{ - LASSERT (0); -} - -#else /* __KERNEL__ */ - -unsigned int -lnet_kiov_nob (unsigned int niov, lnet_kiov_t *kiov) -{ - unsigned int nob = 0; - - while (niov-- > 0) - nob += (kiov++)->kiov_len; - - return (nob); -} - -void -lnet_copy_kiov2kiov (unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset, - unsigned int nsiov, lnet_kiov_t *siov, unsigned int soffset, - unsigned int nob) -{ - /* NB diov, siov are READ-ONLY */ - unsigned int this_nob; - char *daddr = NULL; - char *saddr = NULL; - - if (nob == 0) - return; - - LASSERT (!in_interrupt ()); - - LASSERT (ndiov > 0); - while (doffset >= diov->kiov_len) { - doffset -= diov->kiov_len; - diov++; - ndiov--; - LASSERT (ndiov > 0); - } - - LASSERT (nsiov > 0); - while (soffset >= siov->kiov_len) { - soffset -= siov->kiov_len; - siov++; - nsiov--; - LASSERT (nsiov > 0); - } - - do { - LASSERT (ndiov > 0); - LASSERT (nsiov > 0); - this_nob = MIN(diov->kiov_len - doffset, - siov->kiov_len - soffset); - this_nob = MIN(this_nob, nob); - - if (daddr == NULL) - daddr = ((char *)cfs_kmap(diov->kiov_page)) + - diov->kiov_offset + doffset; - if (saddr == NULL) - saddr = ((char *)cfs_kmap(siov->kiov_page)) + - siov->kiov_offset + soffset; - - /* Vanishing risk of kmap deadlock when mapping 2 pages. - * However in practice at least one of the kiovs will be mapped - * kernel pages and the map/unmap will be NOOPs */ - - memcpy (daddr, saddr, this_nob); - nob -= this_nob; - - if (diov->kiov_len > doffset + this_nob) { - daddr += this_nob; - doffset += this_nob; - } else { - cfs_kunmap(diov->kiov_page); - daddr = NULL; - diov++; - ndiov--; - doffset = 0; - } - - if (siov->kiov_len > soffset + this_nob) { - saddr += this_nob; - soffset += this_nob; - } else { - cfs_kunmap(siov->kiov_page); - saddr = NULL; - siov++; - nsiov--; - soffset = 0; - } - } while (nob > 0); - - if (daddr != NULL) - cfs_kunmap(diov->kiov_page); - if (saddr != NULL) - cfs_kunmap(siov->kiov_page); -} - -void -lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, unsigned int iovoffset, - unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset, - unsigned int nob) -{ - /* NB iov, kiov are READ-ONLY */ - unsigned int this_nob; - char *addr = NULL; - - if (nob == 0) - return; - - LASSERT (!in_interrupt ()); - - LASSERT (niov > 0); - while (iovoffset >= iov->iov_len) { - iovoffset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - - LASSERT (nkiov > 0); - while (kiovoffset >= kiov->kiov_len) { - kiovoffset -= kiov->kiov_len; - kiov++; - nkiov--; - LASSERT (nkiov > 0); - } - - do { - LASSERT (niov > 0); - LASSERT (nkiov > 0); - this_nob = MIN(iov->iov_len - iovoffset, - kiov->kiov_len - kiovoffset); - this_nob = MIN(this_nob, nob); - - if (addr == NULL) - addr = ((char *)cfs_kmap(kiov->kiov_page)) + - kiov->kiov_offset + kiovoffset; - - memcpy ((char *)iov->iov_base + iovoffset, addr, this_nob); - nob -= this_nob; - - if (iov->iov_len > iovoffset + this_nob) { - iovoffset += this_nob; - } else { - iov++; - niov--; - iovoffset = 0; - } - - if (kiov->kiov_len > kiovoffset + this_nob) { - addr += this_nob; - kiovoffset += this_nob; - } else { - cfs_kunmap(kiov->kiov_page); - addr = NULL; - kiov++; - nkiov--; - kiovoffset = 0; - } - - } while (nob > 0); - - if (addr != NULL) - cfs_kunmap(kiov->kiov_page); -} - -void -lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset, - unsigned int niov, struct iovec *iov, unsigned int iovoffset, - unsigned int nob) -{ - /* NB kiov, iov are READ-ONLY */ - unsigned int this_nob; - char *addr = NULL; - - if (nob == 0) - return; - - LASSERT (!in_interrupt ()); - - LASSERT (nkiov > 0); - while (kiovoffset >= kiov->kiov_len) { - kiovoffset -= kiov->kiov_len; - kiov++; - nkiov--; - LASSERT (nkiov > 0); - } - - LASSERT (niov > 0); - while (iovoffset >= iov->iov_len) { - iovoffset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - - do { - LASSERT (nkiov > 0); - LASSERT (niov > 0); - this_nob = MIN(kiov->kiov_len - kiovoffset, - iov->iov_len - iovoffset); - this_nob = MIN(this_nob, nob); - - if (addr == NULL) - addr = ((char *)cfs_kmap(kiov->kiov_page)) + - kiov->kiov_offset + kiovoffset; - - memcpy (addr, (char *)iov->iov_base + iovoffset, this_nob); - nob -= this_nob; - - if (kiov->kiov_len > kiovoffset + this_nob) { - addr += this_nob; - kiovoffset += this_nob; - } else { - cfs_kunmap(kiov->kiov_page); - addr = NULL; - kiov++; - nkiov--; - kiovoffset = 0; - } - - if (iov->iov_len > iovoffset + this_nob) { - iovoffset += this_nob; - } else { - iov++; - niov--; - iovoffset = 0; - } - } while (nob > 0); - - if (addr != NULL) - cfs_kunmap(kiov->kiov_page); -} - -int -lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->kiov_len) { /* skip initial frags */ - offset -= src->kiov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = src->kiov_len - offset; - dst->kiov_page = src->kiov_page; - dst->kiov_offset = src->kiov_offset + offset; - - if (len <= frag_len) { - dst->kiov_len = len; - LASSERT (dst->kiov_offset + dst->kiov_len <= CFS_PAGE_SIZE); - return (niov); - } - - dst->kiov_len = frag_len; - LASSERT (dst->kiov_offset + dst->kiov_len <= CFS_PAGE_SIZE); - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} -#endif - -void -lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - unsigned int niov = 0; - struct iovec *iov = NULL; - lnet_kiov_t *kiov = NULL; - int rc; - - LASSERT (!in_interrupt ()); - LASSERT (mlen == 0 || msg != NULL); - - if (msg != NULL) { - LASSERT(msg->msg_receiving); - LASSERT(!msg->msg_sending); - LASSERT(rlen == msg->msg_len); - LASSERT(mlen <= msg->msg_len); - - msg->msg_wanted = mlen; - msg->msg_offset = offset; - msg->msg_receiving = 0; - - if (mlen != 0) { - niov = msg->msg_niov; - iov = msg->msg_iov; - kiov = msg->msg_kiov; - - LASSERT (niov > 0); - LASSERT ((iov == NULL) != (kiov == NULL)); - } - } - - rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed, - niov, iov, kiov, offset, mlen, rlen); - if (rc < 0) - lnet_finalize(ni, msg, rc); -} - -int -lnet_compare_routers(lnet_peer_t *p1, lnet_peer_t *p2) -{ - if (p1->lp_txqnob < p2->lp_txqnob) - return 1; - - if (p1->lp_txqnob > p2->lp_txqnob) - return -1; - - if (p1->lp_txcredits > p2->lp_txcredits) - return 1; - - if (p1->lp_txcredits < p2->lp_txcredits) - return -1; - - return 0; -} - - -void -lnet_setpayloadbuffer(lnet_msg_t *msg) -{ - lnet_libmd_t *md = msg->msg_md; - - LASSERT (msg->msg_len > 0); - LASSERT (!msg->msg_routing); - LASSERT (md != NULL); - LASSERT (msg->msg_niov == 0); - LASSERT (msg->msg_iov == NULL); - LASSERT (msg->msg_kiov == NULL); - - msg->msg_niov = md->md_niov; - if ((md->md_options & LNET_MD_KIOV) != 0) - msg->msg_kiov = md->md_iov.kiov; - else - msg->msg_iov = md->md_iov.iov; -} - -void -lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target, - unsigned int offset, unsigned int len) -{ - msg->msg_type = type; - msg->msg_target = target; - msg->msg_len = len; - msg->msg_offset = offset; - - if (len != 0) - lnet_setpayloadbuffer(msg); - - memset (&msg->msg_hdr, 0, sizeof (msg->msg_hdr)); - msg->msg_hdr.type = cpu_to_le32(type); - msg->msg_hdr.dest_nid = cpu_to_le64(target.nid); - msg->msg_hdr.dest_pid = cpu_to_le32(target.pid); - /* src_nid will be set later */ - msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid); - msg->msg_hdr.payload_length = cpu_to_le32(len); -} - -void -lnet_ni_send(lnet_ni_t *ni, lnet_msg_t *msg) -{ - void *priv = msg->msg_private; - int rc; - - LASSERT (!in_interrupt ()); - LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND || - (msg->msg_txcredit && msg->msg_peertxcredit)); - - rc = (ni->ni_lnd->lnd_send)(ni, priv, msg); - if (rc < 0) - lnet_finalize(ni, msg, rc); -} - -int -lnet_eager_recv_locked(lnet_msg_t *msg) -{ - lnet_peer_t *peer; - lnet_ni_t *ni; - int rc = 0; - - LASSERT (!msg->msg_delayed); - msg->msg_delayed = 1; - - LASSERT (msg->msg_receiving); - LASSERT (!msg->msg_sending); - - peer = msg->msg_rxpeer; - ni = peer->lp_ni; - - if (ni->ni_lnd->lnd_eager_recv != NULL) { - LNET_UNLOCK(); - - rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg, - &msg->msg_private); - if (rc != 0) { - CERROR("recv from %s / send to %s aborted: " - "eager_recv failed %d\n", - libcfs_nid2str(peer->lp_nid), - libcfs_id2str(msg->msg_target), rc); - LASSERT (rc < 0); /* required by my callers */ - } - - LNET_LOCK(); - } - - return rc; -} - -int -lnet_post_send_locked (lnet_msg_t *msg, int do_send) -{ - /* lnet_send is going to LNET_UNLOCK immediately after this, so it sets - * do_send FALSE and I don't do the unlock/send/lock bit. I return - * EAGAIN if msg blocked and 0 if sent or OK to send */ - lnet_peer_t *lp = msg->msg_txpeer; - lnet_ni_t *ni = lp->lp_ni; - - /* non-lnet_send() callers have checked before */ - LASSERT (!do_send || msg->msg_delayed); - LASSERT (!msg->msg_receiving); - - if (!msg->msg_peertxcredit) { - LASSERT ((lp->lp_txcredits < 0) == !list_empty(&lp->lp_txq)); - - msg->msg_peertxcredit = 1; - lp->lp_txqnob += msg->msg_len + sizeof(lnet_hdr_t); - lp->lp_txcredits--; - - if (lp->lp_txcredits < lp->lp_mintxcredits) - lp->lp_mintxcredits = lp->lp_txcredits; - - if (lp->lp_txcredits < 0) { - msg->msg_delayed = 1; - list_add_tail (&msg->msg_list, &lp->lp_txq); - return EAGAIN; - } - } - - if (!msg->msg_txcredit) { - LASSERT ((ni->ni_txcredits < 0) == !list_empty(&ni->ni_txq)); - - msg->msg_txcredit = 1; - ni->ni_txcredits--; - - if (ni->ni_txcredits < ni->ni_mintxcredits) - ni->ni_mintxcredits = ni->ni_txcredits; - - if (ni->ni_txcredits < 0) { - msg->msg_delayed = 1; - list_add_tail (&msg->msg_list, &ni->ni_txq); - return EAGAIN; - } - } - - if (do_send) { - LNET_UNLOCK(); - lnet_ni_send(ni, msg); - LNET_LOCK(); - } - return 0; -} - -#ifdef __KERNEL__ -static void -lnet_commit_routedmsg (lnet_msg_t *msg) -{ - /* ALWAYS called holding the LNET_LOCK */ - LASSERT (msg->msg_routing); - - the_lnet.ln_counters.msgs_alloc++; - if (the_lnet.ln_counters.msgs_alloc > - the_lnet.ln_counters.msgs_max) - the_lnet.ln_counters.msgs_max = - the_lnet.ln_counters.msgs_alloc; - - the_lnet.ln_counters.route_count++; - the_lnet.ln_counters.route_length += msg->msg_len; - - LASSERT (!msg->msg_onactivelist); - msg->msg_onactivelist = 1; - list_add (&msg->msg_activelist, &the_lnet.ln_active_msgs); -} - -lnet_rtrbufpool_t * -lnet_msg2bufpool(lnet_msg_t *msg) -{ - lnet_rtrbufpool_t *rbp = &the_lnet.ln_rtrpools[0]; - - LASSERT (msg->msg_len <= LNET_MTU); - while (msg->msg_len > rbp->rbp_npages * CFS_PAGE_SIZE) { - rbp++; - LASSERT (rbp < &the_lnet.ln_rtrpools[LNET_NRBPOOLS]); - } - - return rbp; -} - -int -lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv) -{ - /* lnet_parse is going to LNET_UNLOCK immediately after this, so it - * sets do_recv FALSE and I don't do the unlock/send/lock bit. I - * return EAGAIN if msg blocked and 0 if sent or OK to send */ - lnet_peer_t *lp = msg->msg_rxpeer; - lnet_rtrbufpool_t *rbp; - lnet_rtrbuf_t *rb; - - LASSERT (msg->msg_iov == NULL); - LASSERT (msg->msg_kiov == NULL); - LASSERT (msg->msg_niov == 0); - LASSERT (msg->msg_routing); - LASSERT (msg->msg_receiving); - LASSERT (!msg->msg_sending); - - /* non-lnet_parse callers only send delayed messages */ - LASSERT (!do_recv || msg->msg_delayed); - - if (!msg->msg_peerrtrcredit) { - LASSERT ((lp->lp_rtrcredits < 0) == !list_empty(&lp->lp_rtrq)); - - msg->msg_peerrtrcredit = 1; - lp->lp_rtrcredits--; - if (lp->lp_rtrcredits < lp->lp_minrtrcredits) - lp->lp_minrtrcredits = lp->lp_rtrcredits; - - if (lp->lp_rtrcredits < 0) { - /* must have checked eager_recv before here */ - LASSERT (msg->msg_delayed); - list_add_tail(&msg->msg_list, &lp->lp_rtrq); - return EAGAIN; - } - } - - rbp = lnet_msg2bufpool(msg); - - if (!msg->msg_rtrcredit) { - LASSERT ((rbp->rbp_credits < 0) == !list_empty(&rbp->rbp_msgs)); - - msg->msg_rtrcredit = 1; - rbp->rbp_credits--; - if (rbp->rbp_credits < rbp->rbp_mincredits) - rbp->rbp_mincredits = rbp->rbp_credits; - - if (rbp->rbp_credits < 0) { - /* must have checked eager_recv before here */ - LASSERT (msg->msg_delayed); - list_add_tail(&msg->msg_list, &rbp->rbp_msgs); - return EAGAIN; - } - } - - LASSERT (!list_empty(&rbp->rbp_bufs)); - rb = list_entry(rbp->rbp_bufs.next, lnet_rtrbuf_t, rb_list); - list_del(&rb->rb_list); - - msg->msg_niov = rbp->rbp_npages; - msg->msg_kiov = &rb->rb_kiov[0]; - - if (do_recv) { - LNET_UNLOCK(); - lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1, - 0, msg->msg_len, msg->msg_len); - LNET_LOCK(); - } - return 0; -} -#endif - -void -lnet_return_credits_locked (lnet_msg_t *msg) -{ - lnet_peer_t *txpeer = msg->msg_txpeer; - lnet_peer_t *rxpeer = msg->msg_rxpeer; - lnet_msg_t *msg2; - lnet_ni_t *ni; - - if (msg->msg_txcredit) { - /* give back NI txcredits */ - msg->msg_txcredit = 0; - ni = txpeer->lp_ni; - - LASSERT((ni->ni_txcredits < 0) == !list_empty(&ni->ni_txq)); - - ni->ni_txcredits++; - if (ni->ni_txcredits <= 0) { - msg2 = list_entry(ni->ni_txq.next, lnet_msg_t, msg_list); - list_del(&msg2->msg_list); - - LASSERT(msg2->msg_txpeer->lp_ni == ni); - LASSERT(msg2->msg_delayed); - - (void) lnet_post_send_locked(msg2, 1); - } - } - - if (msg->msg_peertxcredit) { - /* give back peer txcredits */ - msg->msg_peertxcredit = 0; - - LASSERT((txpeer->lp_txcredits < 0) == !list_empty(&txpeer->lp_txq)); - - txpeer->lp_txqnob -= msg->msg_len + sizeof(lnet_hdr_t); - LASSERT (txpeer->lp_txqnob >= 0); - - txpeer->lp_txcredits++; - if (txpeer->lp_txcredits <= 0) { - msg2 = list_entry(txpeer->lp_txq.next, - lnet_msg_t, msg_list); - list_del(&msg2->msg_list); - - LASSERT (msg2->msg_txpeer == txpeer); - LASSERT (msg2->msg_delayed); - - (void) lnet_post_send_locked(msg2, 1); - } - } - - if (txpeer != NULL) { - msg->msg_txpeer = NULL; - lnet_peer_decref_locked(txpeer); - } - -#ifdef __KERNEL__ - if (msg->msg_rtrcredit) { - /* give back global router credits */ - lnet_rtrbuf_t *rb; - lnet_rtrbufpool_t *rbp; - - /* NB If a msg ever blocks for a buffer in rbp_msgs, it stays - * there until it gets one allocated, or aborts the wait - * itself */ - LASSERT (msg->msg_kiov != NULL); - - rb = list_entry(msg->msg_kiov, lnet_rtrbuf_t, rb_kiov[0]); - rbp = rb->rb_pool; - LASSERT (rbp == lnet_msg2bufpool(msg)); - - msg->msg_kiov = NULL; - msg->msg_rtrcredit = 0; - - LASSERT((rbp->rbp_credits < 0) == !list_empty(&rbp->rbp_msgs)); - LASSERT((rbp->rbp_credits > 0) == !list_empty(&rbp->rbp_bufs)); - - list_add(&rb->rb_list, &rbp->rbp_bufs); - rbp->rbp_credits++; - if (rbp->rbp_credits <= 0) { - msg2 = list_entry(rbp->rbp_msgs.next, - lnet_msg_t, msg_list); - list_del(&msg2->msg_list); - - (void) lnet_post_routed_recv_locked(msg2, 1); - } - } - - if (msg->msg_peerrtrcredit) { - /* give back peer router credits */ - msg->msg_peerrtrcredit = 0; - - LASSERT((rxpeer->lp_rtrcredits < 0) == !list_empty(&rxpeer->lp_rtrq)); - - rxpeer->lp_rtrcredits++; - if (rxpeer->lp_rtrcredits <= 0) { - msg2 = list_entry(rxpeer->lp_rtrq.next, - lnet_msg_t, msg_list); - list_del(&msg2->msg_list); - - (void) lnet_post_routed_recv_locked(msg2, 1); - } - } -#else - LASSERT (!msg->msg_rtrcredit); - LASSERT (!msg->msg_peerrtrcredit); -#endif - if (rxpeer != NULL) { - msg->msg_rxpeer = NULL; - lnet_peer_decref_locked(rxpeer); - } -} - -int -lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg) -{ - lnet_nid_t dst_nid = msg->msg_target.nid; - lnet_ni_t *src_ni; - lnet_ni_t *local_ni; - lnet_remotenet_t *rnet; - lnet_route_t *route; - lnet_route_t *best_route; - struct list_head *tmp; - lnet_peer_t *lp; - lnet_peer_t *lp2; - int rc; - - LASSERT (msg->msg_txpeer == NULL); - LASSERT (!msg->msg_sending); - LASSERT (!msg->msg_target_is_router); - LASSERT (!msg->msg_receiving); - - msg->msg_sending = 1; - - /* NB! ni != NULL == interface pre-determined (ACK/REPLY) */ - - LNET_LOCK(); - - if (the_lnet.ln_shutdown) { - LNET_UNLOCK(); - return -ESHUTDOWN; - } - - if (src_nid == LNET_NID_ANY) { - src_ni = NULL; - } else { - src_ni = lnet_nid2ni_locked(src_nid); - if (src_ni == NULL) { - LNET_UNLOCK(); - CERROR("Can't send to %s: src %s is not a local nid\n", - libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid)); - return -EINVAL; - } - LASSERT (!msg->msg_routing); - } - - /* Is this for someone on a local network? */ - local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid)); - - if (local_ni != NULL) { - if (src_ni == NULL) { - src_ni = local_ni; - src_nid = src_ni->ni_nid; - } else if (src_ni == local_ni) { - lnet_ni_decref_locked(local_ni); - } else { - lnet_ni_decref_locked(local_ni); - lnet_ni_decref_locked(src_ni); - LNET_UNLOCK(); - CERROR("no route to %s via from %s\n", - libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid)); - return -EINVAL; - } - - LASSERT (src_nid != LNET_NID_ANY); - - if (!msg->msg_routing) { - src_nid = lnet_ptlcompat_srcnid(src_nid, dst_nid); - msg->msg_hdr.src_nid = cpu_to_le64(src_nid); - } - - if (src_ni == the_lnet.ln_loni) { - /* No send credit hassles with LOLND */ - LNET_UNLOCK(); - lnet_ni_send(src_ni, msg); - lnet_ni_decref(src_ni); - return 0; - } - - rc = lnet_nid2peer_locked(&lp, dst_nid); - lnet_ni_decref_locked(src_ni); /* lp has ref on src_ni; lose mine */ - if (rc != 0) { - LNET_UNLOCK(); - CERROR("Error %d finding peer %s\n", rc, - libcfs_nid2str(dst_nid)); - /* ENOMEM or shutting down */ - return rc; - } - LASSERT (lp->lp_ni == src_ni); - } else { - /* sending to a remote network */ - rnet = lnet_find_net_locked(LNET_NIDNET(dst_nid)); - if (rnet == NULL) { - if (src_ni != NULL) - lnet_ni_decref_locked(src_ni); - LNET_UNLOCK(); - CERROR("No route to %s\n", libcfs_id2str(msg->msg_target)); - return -EHOSTUNREACH; - } - - /* Find the best gateway I can use */ - lp = NULL; - best_route = NULL; - list_for_each(tmp, &rnet->lrn_routes) { - route = list_entry(tmp, lnet_route_t, lr_list); - lp2 = route->lr_gateway; - - if (lp2->lp_alive && - (src_ni == NULL || lp2->lp_ni == src_ni) && - (lp == NULL || lnet_compare_routers(lp2, lp) > 0)) { - best_route = route; - lp = lp2; - } - } - - if (lp == NULL) { - if (src_ni != NULL) - lnet_ni_decref_locked(src_ni); - LNET_UNLOCK(); - CERROR("No route to %s (all routers down)\n", - libcfs_id2str(msg->msg_target)); - return -EHOSTUNREACH; - } - - /* Place selected route at the end of the route list to ensure - * fairness; everything else being equal... */ - list_del(&best_route->lr_list); - list_add_tail(&best_route->lr_list, &rnet->lrn_routes); - - if (src_ni == NULL) { - src_ni = lp->lp_ni; - src_nid = src_ni->ni_nid; - } else { - LASSERT (src_ni == lp->lp_ni); - lnet_ni_decref_locked(src_ni); - } - - lnet_peer_addref_locked(lp); - - LASSERT (src_nid != LNET_NID_ANY); - - if (!msg->msg_routing) { - /* I'm the source and now I know which NI to send on */ - src_nid = lnet_ptlcompat_srcnid(src_nid, dst_nid); - msg->msg_hdr.src_nid = cpu_to_le64(src_nid); - } - - msg->msg_target_is_router = 1; - msg->msg_target.nid = lp->lp_nid; - msg->msg_target.pid = LUSTRE_SRV_LNET_PID; - } - - /* 'lp' is our best choice of peer */ - - LASSERT (!msg->msg_peertxcredit); - LASSERT (!msg->msg_txcredit); - LASSERT (msg->msg_txpeer == NULL); - - msg->msg_txpeer = lp; /* msg takes my ref on lp */ - - rc = lnet_post_send_locked(msg, 0); - LNET_UNLOCK(); - - if (rc == 0) - lnet_ni_send(src_ni, msg); - - return 0; -} - -static void -lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg) -{ - /* ALWAYS called holding the LNET_LOCK */ - /* Here, we commit the MD to a network OP by marking it busy and - * decrementing its threshold. Come what may, the network "owns" - * the MD until a call to lnet_finalize() signals completion. */ - LASSERT (!msg->msg_routing); - - msg->msg_md = md; - - md->md_refcount++; - if (md->md_threshold != LNET_MD_THRESH_INF) { - LASSERT (md->md_threshold > 0); - md->md_threshold--; - } - - the_lnet.ln_counters.msgs_alloc++; - if (the_lnet.ln_counters.msgs_alloc > - the_lnet.ln_counters.msgs_max) - the_lnet.ln_counters.msgs_max = - the_lnet.ln_counters.msgs_alloc; - - LASSERT (!msg->msg_onactivelist); - msg->msg_onactivelist = 1; - list_add (&msg->msg_activelist, &the_lnet.ln_active_msgs); -} - -static void -lnet_drop_message (lnet_ni_t *ni, void *private, unsigned int nob) -{ - LNET_LOCK(); - the_lnet.ln_counters.drop_count++; - the_lnet.ln_counters.drop_length += nob; - LNET_UNLOCK(); - - lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob); -} - -static void -lnet_drop_delayed_put(lnet_msg_t *msg, char *reason) -{ - LASSERT (msg->msg_md == NULL); - LASSERT (msg->msg_delayed); - LASSERT (msg->msg_rxpeer != NULL); - LASSERT (msg->msg_hdr.type == LNET_MSG_PUT); - - CWARN("Dropping delayed PUT from %s portal %d match "LPU64 - " offset %d length %d: %s\n", - libcfs_id2str((lnet_process_id_t){ - .nid = msg->msg_hdr.src_nid, - .pid = msg->msg_hdr.src_pid}), - msg->msg_hdr.msg.put.ptl_index, - msg->msg_hdr.msg.put.match_bits, - msg->msg_hdr.msg.put.offset, - msg->msg_hdr.payload_length, - reason); - - /* NB I can't drop msg's ref on msg_rxpeer until after I've - * called lnet_drop_message(), so I just hang onto msg as well - * until that's done */ - - lnet_drop_message(msg->msg_rxpeer->lp_ni, - msg->msg_private, msg->msg_len); - - LNET_LOCK(); - - lnet_peer_decref_locked(msg->msg_rxpeer); - msg->msg_rxpeer = NULL; - - lnet_msg_free(msg); - - LNET_UNLOCK(); -} - -int -LNetSetLazyPortal(int portal) -{ - lnet_portal_t *ptl = &the_lnet.ln_portals[portal]; - - if (portal < 0 || portal >= the_lnet.ln_nportals) - return -EINVAL; - - CDEBUG(D_NET, "Setting portal %d lazy\n", portal); - - LNET_LOCK(); - - ptl->ptl_options |= LNET_PTL_LAZY; - - LNET_UNLOCK(); - - return 0; -} - -int -LNetClearLazyPortal(int portal) -{ - struct list_head zombies; - lnet_portal_t *ptl = &the_lnet.ln_portals[portal]; - lnet_msg_t *msg; - - if (portal < 0 || portal >= the_lnet.ln_nportals) - return -EINVAL; - - LNET_LOCK(); - - if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) { - LNET_UNLOCK(); - return 0; - } - - if (the_lnet.ln_shutdown) - CWARN ("Active lazy portal %d on exit\n", portal); - else - CDEBUG (D_NET, "clearing portal %d lazy\n", portal); - - /* grab all the blocked messages atomically */ - list_add(&zombies, &ptl->ptl_msgq); - list_del_init(&ptl->ptl_msgq); - - ptl->ptl_msgq_version++; - ptl->ptl_options &= ~LNET_PTL_LAZY; - - LNET_UNLOCK(); - - while (!list_empty(&zombies)) { - msg = list_entry(zombies.next, lnet_msg_t, msg_list); - list_del(&msg->msg_list); - - lnet_drop_delayed_put(msg, "Clearing lazy portal attr"); - } - - return 0; -} - -static void -lnet_recv_put(lnet_libmd_t *md, lnet_msg_t *msg, int delayed, - unsigned int offset, unsigned int mlength) -{ - lnet_hdr_t *hdr = &msg->msg_hdr; - - LNET_LOCK(); - - the_lnet.ln_counters.recv_count++; - the_lnet.ln_counters.recv_length += mlength; - - LNET_UNLOCK(); - - if (mlength != 0) - lnet_setpayloadbuffer(msg); - - msg->msg_ev.type = LNET_EVENT_PUT; - msg->msg_ev.target.pid = hdr->dest_pid; - msg->msg_ev.target.nid = hdr->dest_nid; - msg->msg_ev.hdr_data = hdr->msg.put.hdr_data; - - /* Must I ACK? If so I'll grab the ack_wmd out of the header and put - * it back into the ACK during lnet_finalize() */ - msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) && - (md->md_options & LNET_MD_ACK_DISABLE) == 0); - - lnet_ni_recv(msg->msg_rxpeer->lp_ni, - msg->msg_private, - msg, delayed, offset, mlength, - hdr->payload_length); -} - -/* called with LNET_LOCK held */ -void -lnet_match_blocked_msg(lnet_libmd_t *md) -{ - CFS_LIST_HEAD (drops); - CFS_LIST_HEAD (matches); - struct list_head *tmp; - struct list_head *entry; - lnet_msg_t *msg; - lnet_me_t *me = md->md_me; - lnet_portal_t *ptl = &the_lnet.ln_portals[me->me_portal]; - - LASSERT (me->me_portal < the_lnet.ln_nportals); - - if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) { - LASSERT (list_empty(&ptl->ptl_msgq)); - return; - } - - LASSERT (md->md_refcount == 0); /* a brand new MD */ - - list_for_each_safe (entry, tmp, &ptl->ptl_msgq) { - int rc; - int index; - unsigned int mlength; - unsigned int offset; - lnet_hdr_t *hdr; - lnet_process_id_t src; - - msg = list_entry(entry, lnet_msg_t, msg_list); - - LASSERT (msg->msg_delayed); - - hdr = &msg->msg_hdr; - index = hdr->msg.put.ptl_index; - - src.nid = hdr->src_nid; - src.pid = hdr->src_pid; - - rc = lnet_try_match_md(index, LNET_MD_OP_PUT, src, - hdr->payload_length, - hdr->msg.put.offset, - hdr->msg.put.match_bits, - md, msg, &mlength, &offset); - - if (rc == LNET_MATCHMD_NONE) - continue; - - /* Hurrah! This _is_ a match */ - list_del(&msg->msg_list); - ptl->ptl_msgq_version++; - - if (rc == LNET_MATCHMD_OK) { - list_add_tail(&msg->msg_list, &matches); - - CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d " - "match "LPU64" offset %d length %d.\n", - libcfs_id2str(src), - hdr->msg.put.ptl_index, - hdr->msg.put.match_bits, - hdr->msg.put.offset, - hdr->payload_length); - } else { - LASSERT (rc == LNET_MATCHMD_DROP); - - list_add_tail(&msg->msg_list, &drops); - } - - if (lnet_md_exhausted(md)) - break; - } - - LNET_UNLOCK(); - - list_for_each_safe (entry, tmp, &drops) { - msg = list_entry(entry, lnet_msg_t, msg_list); - - list_del(&msg->msg_list); - - lnet_drop_delayed_put(msg, "Bad match"); - } - - list_for_each_safe (entry, tmp, &matches) { - msg = list_entry(entry, lnet_msg_t, msg_list); - - list_del(&msg->msg_list); - - /* md won't disappear under me, since each msg - * holds a ref on it */ - lnet_recv_put(md, msg, 1, - msg->msg_ev.offset, - msg->msg_ev.mlength); - } - - LNET_LOCK(); -} - -static int -lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) -{ - int rc; - int index; - lnet_hdr_t *hdr = &msg->msg_hdr; - unsigned int rlength = hdr->payload_length; - unsigned int mlength = 0; - unsigned int offset = 0; - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_libmd_t *md; - - /* Convert put fields to host byte order */ - hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits); - hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index); - hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset); - - index = hdr->msg.put.ptl_index; - - LNET_LOCK(); - - rc = lnet_match_md(index, LNET_MD_OP_PUT, src, - rlength, hdr->msg.put.offset, - hdr->msg.put.match_bits, msg, - &mlength, &offset, &md); - switch (rc) { - default: - LBUG(); - - case LNET_MATCHMD_OK: - LNET_UNLOCK(); - lnet_recv_put(md, msg, 0, offset, mlength); - return 0; - - case LNET_MATCHMD_NONE: - rc = lnet_eager_recv_locked(msg); - if (rc == 0 && !the_lnet.ln_shutdown) { - list_add_tail(&msg->msg_list, - &the_lnet.ln_portals[index].ptl_msgq); - - the_lnet.ln_portals[index].ptl_msgq_version++; - - CDEBUG(D_NET, "Delaying PUT from %s portal %d match " - LPU64" offset %d length %d: no match \n", - libcfs_id2str(src), index, - hdr->msg.put.match_bits, - hdr->msg.put.offset, rlength); - - LNET_UNLOCK(); - return 0; - } - /* fall through */ - - case LNET_MATCHMD_DROP: - CDEBUG(D_NETERROR, - "Dropping PUT from %s portal %d match "LPU64 - " offset %d length %d: %d\n", - libcfs_id2str(src), index, - hdr->msg.put.match_bits, - hdr->msg.put.offset, rlength, rc); - LNET_UNLOCK(); - - return ENOENT; /* +ve: OK but no match */ - } -} - -static int -lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get) -{ - lnet_hdr_t *hdr = &msg->msg_hdr; - unsigned int mlength = 0; - unsigned int offset = 0; - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_handle_wire_t reply_wmd; - lnet_libmd_t *md; - int rc; - - /* Convert get fields to host byte order */ - hdr->msg.get.match_bits = le64_to_cpu(hdr->msg.get.match_bits); - hdr->msg.get.ptl_index = le32_to_cpu(hdr->msg.get.ptl_index); - hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length); - hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset); - - LNET_LOCK(); - - rc = lnet_match_md(hdr->msg.get.ptl_index, LNET_MD_OP_GET, src, - hdr->msg.get.sink_length, hdr->msg.get.src_offset, - hdr->msg.get.match_bits, msg, - &mlength, &offset, &md); - if (rc == LNET_MATCHMD_DROP) { - CDEBUG(D_NETERROR, - "Dropping GET from %s portal %d match "LPU64 - " offset %d length %d\n", - libcfs_id2str(src), - hdr->msg.get.ptl_index, - hdr->msg.get.match_bits, - hdr->msg.get.src_offset, - hdr->msg.get.sink_length); - LNET_UNLOCK(); - return ENOENT; /* +ve: OK but no match */ - } - - LASSERT (rc == LNET_MATCHMD_OK); - - the_lnet.ln_counters.send_count++; - the_lnet.ln_counters.send_length += mlength; - - LNET_UNLOCK(); - - reply_wmd = hdr->msg.get.return_wmd; - - lnet_prep_send(msg, LNET_MSG_REPLY, src, offset, mlength); - - msg->msg_hdr.msg.reply.dst_wmd = reply_wmd; - - msg->msg_ev.type = LNET_EVENT_GET; - msg->msg_ev.target.pid = hdr->dest_pid; - msg->msg_ev.target.nid = hdr->dest_nid; - msg->msg_ev.hdr_data = 0; - - if (rdma_get) { - /* The LND completes the REPLY from her recv procedure */ - lnet_ni_recv(ni, msg->msg_private, msg, 0, - msg->msg_offset, msg->msg_len, msg->msg_len); - return 0; - } - - lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0); - msg->msg_receiving = 0; - - rc = lnet_send(ni->ni_nid, msg); - if (rc < 0) { - /* didn't get as far as lnet_ni_send() */ - CERROR("%s: Unable to send REPLY for GET from %s: %d\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), rc); - - lnet_finalize(ni, msg, rc); - } - - return 0; -} - -static int -lnet_parse_reply(lnet_ni_t *ni, lnet_msg_t *msg) -{ - void *private = msg->msg_private; - lnet_hdr_t *hdr = &msg->msg_hdr; - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_libmd_t *md; - int rlength; - int mlength; - - LNET_LOCK(); - - /* NB handles only looked up by creator (no flips) */ - md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd); - if (md == NULL || md->md_threshold == 0) { - CDEBUG(D_NETERROR, "%s: Dropping REPLY from %s for %s " - "MD "LPX64"."LPX64"\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - (md == NULL) ? "invalid" : "inactive", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie); - - LNET_UNLOCK(); - return ENOENT; /* +ve: OK but no match */ - } - - LASSERT (md->md_offset == 0); - - rlength = hdr->payload_length; - mlength = MIN(rlength, md->md_length); - - if (mlength < rlength && - (md->md_options & LNET_MD_TRUNCATE) == 0) { - CDEBUG(D_NETERROR, "%s: Dropping REPLY from %s length %d " - "for MD "LPX64" would overflow (%d)\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - rlength, hdr->msg.reply.dst_wmd.wh_object_cookie, - mlength); - LNET_UNLOCK(); - return ENOENT; /* +ve: OK but no match */ - } - - CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md "LPX64"\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie); - - lnet_commit_md(md, msg); - - if (mlength != 0) - lnet_setpayloadbuffer(msg); - - msg->msg_ev.type = LNET_EVENT_REPLY; - msg->msg_ev.target.pid = hdr->dest_pid; - msg->msg_ev.target.nid = hdr->dest_nid; - msg->msg_ev.initiator = src; - msg->msg_ev.rlength = rlength; - msg->msg_ev.mlength = mlength; - msg->msg_ev.offset = 0; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - the_lnet.ln_counters.recv_count++; - the_lnet.ln_counters.recv_length += mlength; - - LNET_UNLOCK(); - - lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength); - return 0; -} - -static int -lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg) -{ - lnet_hdr_t *hdr = &msg->msg_hdr; - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_libmd_t *md; - - /* Convert ack fields to host byte order */ - hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits); - hdr->msg.ack.mlength = le32_to_cpu(hdr->msg.ack.mlength); - - LNET_LOCK(); - - /* NB handles only looked up by creator (no flips) */ - md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd); - if (md == NULL || md->md_threshold == 0) { - /* Don't moan; this is expected */ - CDEBUG(D_NET, - "%s: Dropping ACK from %s to %s MD "LPX64"."LPX64"\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - (md == NULL) ? "invalid" : "inactive", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie); - LNET_UNLOCK(); - return ENOENT; /* +ve! */ - } - - CDEBUG(D_NET, "%s: ACK from %s into md "LPX64"\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - hdr->msg.ack.dst_wmd.wh_object_cookie); - - lnet_commit_md(md, msg); - - msg->msg_ev.type = LNET_EVENT_ACK; - msg->msg_ev.target.pid = hdr->dest_pid; - msg->msg_ev.target.nid = hdr->dest_nid; - msg->msg_ev.initiator = src; - msg->msg_ev.mlength = hdr->msg.ack.mlength; - msg->msg_ev.match_bits = hdr->msg.ack.match_bits; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - the_lnet.ln_counters.recv_count++; - - LNET_UNLOCK(); - - lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len); - return 0; -} - -char * -lnet_msgtyp2str (int type) -{ - switch (type) { - case LNET_MSG_ACK: - return ("ACK"); - case LNET_MSG_PUT: - return ("PUT"); - case LNET_MSG_GET: - return ("GET"); - case LNET_MSG_REPLY: - return ("REPLY"); - case LNET_MSG_HELLO: - return ("HELLO"); - default: - return ("<UNKNOWN>"); - } -} - -void -lnet_print_hdr(lnet_hdr_t * hdr) -{ - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_process_id_t dst = {/* .nid = */ hdr->dest_nid, - /* .pid = */ hdr->dest_pid}; - char *type_str = lnet_msgtyp2str (hdr->type); - - CWARN("P3 Header at %p of type %s\n", hdr, type_str); - CWARN(" From %s\n", libcfs_id2str(src)); - CWARN(" To %s\n", libcfs_id2str(dst)); - - switch (hdr->type) { - default: - break; - - case LNET_MSG_PUT: - CWARN(" Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPU64"\n", - hdr->msg.put.ptl_index, - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - hdr->msg.put.match_bits); - CWARN(" Length %d, offset %d, hdr data "LPX64"\n", - hdr->payload_length, hdr->msg.put.offset, - hdr->msg.put.hdr_data); - break; - - case LNET_MSG_GET: - CWARN(" Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPU64"\n", hdr->msg.get.ptl_index, - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - CWARN(" Length %d, src offset %d\n", - hdr->msg.get.sink_length, - hdr->msg.get.src_offset); - break; - - case LNET_MSG_ACK: - CWARN(" dst md "LPX64"."LPX64", " - "manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - hdr->msg.ack.mlength); - break; - - case LNET_MSG_REPLY: - CWARN(" dst md "LPX64"."LPX64", " - "length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - hdr->payload_length); - } - -} - - -int -lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, - void *private, int rdma_req) -{ - int rc = 0; - int for_me; - lnet_msg_t *msg; - lnet_nid_t dest_nid; - lnet_nid_t src_nid; - __u32 payload_length; - __u32 type; - - LASSERT (!in_interrupt ()); - - type = le32_to_cpu(hdr->type); - src_nid = le64_to_cpu(hdr->src_nid); - dest_nid = le64_to_cpu(hdr->dest_nid); - payload_length = le32_to_cpu(hdr->payload_length); - - for_me = lnet_ptlcompat_matchnid(ni->ni_nid, dest_nid); - - switch (type) { - case LNET_MSG_ACK: - case LNET_MSG_GET: - if (payload_length > 0) { - CERROR("%s, src %s: bad %s payload %d (0 expected)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), payload_length); - return -EPROTO; - } - break; - - case LNET_MSG_PUT: - case LNET_MSG_REPLY: - if (payload_length > (for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) { - CERROR("%s, src %s: bad %s payload %d " - "(%d max expected)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), - payload_length, - for_me ? LNET_MAX_PAYLOAD : LNET_MTU); - return -EPROTO; - } - break; - - default: - CERROR("%s, src %s: Bad message type 0x%x\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), type); - return -EPROTO; - } - - /* Regard a bad destination NID as a protocol error. Senders should - * know what they're doing; if they don't they're misconfigured, buggy - * or malicious so we chop them off at the knees :) */ - - if (!for_me) { - if (the_lnet.ln_ptlcompat > 0) { - /* portals compatibility is single-network */ - CERROR ("%s, src %s: Bad dest nid %s " - "(routing not supported)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (the_lnet.ln_ptlcompat == 0 && - LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) { - /* should have gone direct */ - CERROR ("%s, src %s: Bad dest nid %s " - "(should have been sent direct)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (the_lnet.ln_ptlcompat == 0 && - lnet_islocalnid(dest_nid)) { - /* dest is another local NI; sender should have used - * this node's NID on its own network */ - CERROR ("%s, src %s: Bad dest nid %s " - "(it's my nid but on a different network)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (rdma_req && type == LNET_MSG_GET) { - CERROR ("%s, src %s: Bad optimized GET for %s " - "(final destination must be me)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (!the_lnet.ln_routing) { - CERROR ("%s, src %s: Dropping message for %s " - "(routing not enabled)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - goto drop; - } - } - - /* Message looks OK; we're not going to return an error, so we MUST - * call back lnd_recv() come what may... */ - - if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer (src_nid, 0)) /* shall we now? */ - { - CERROR("%s, src %s: Dropping %s to simulate failure\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type)); - goto drop; - } - - msg = lnet_msg_alloc(); - if (msg == NULL) { - CERROR("%s, src %s: Dropping %s (out of memory)\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type)); - goto drop; - } - - /* msg zeroed in lnet_msg_alloc; i.e. flags all clear, pointers NULL etc */ - - msg->msg_type = type; - msg->msg_private = private; - msg->msg_receiving = 1; - msg->msg_len = msg->msg_wanted = payload_length; - msg->msg_offset = 0; - msg->msg_hdr = *hdr; - - LNET_LOCK(); - rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid); - if (rc != 0) { - LNET_UNLOCK(); - CERROR("%s, src %s: Dropping %s " - "(error %d looking up sender)\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), rc); - goto free_drop; - } - LNET_UNLOCK(); - -#ifndef __KERNEL__ - LASSERT (for_me); -#else - if (!for_me) { - msg->msg_target.pid = le32_to_cpu(hdr->dest_pid); - msg->msg_target.nid = dest_nid; - msg->msg_routing = 1; - msg->msg_offset = 0; - - LNET_LOCK(); - if (msg->msg_rxpeer->lp_rtrcredits <= 0 || - lnet_msg2bufpool(msg)->rbp_credits <= 0) { - rc = lnet_eager_recv_locked(msg); - if (rc != 0) { - LNET_UNLOCK(); - goto free_drop; - } - } - - lnet_commit_routedmsg(msg); - rc = lnet_post_routed_recv_locked(msg, 0); - LNET_UNLOCK(); - - if (rc == 0) - lnet_ni_recv(ni, msg->msg_private, msg, 0, - 0, payload_length, payload_length); - return 0; - } -#endif - /* convert common msg->hdr fields to host byteorder */ - msg->msg_hdr.type = type; - msg->msg_hdr.src_nid = src_nid; - msg->msg_hdr.src_pid = le32_to_cpu(msg->msg_hdr.src_pid); - msg->msg_hdr.dest_nid = dest_nid; - msg->msg_hdr.dest_pid = le32_to_cpu(msg->msg_hdr.dest_pid); - msg->msg_hdr.payload_length = payload_length; - - msg->msg_ev.sender = from_nid; - - switch (type) { - case LNET_MSG_ACK: - rc = lnet_parse_ack(ni, msg); - break; - case LNET_MSG_PUT: - rc = lnet_parse_put(ni, msg); - break; - case LNET_MSG_GET: - rc = lnet_parse_get(ni, msg, rdma_req); - break; - case LNET_MSG_REPLY: - rc = lnet_parse_reply(ni, msg); - break; - default: - LASSERT(0); - goto free_drop; /* prevent an unused label if !kernel */ - } - - if (rc == 0) - return 0; - - LASSERT (rc == ENOENT); - - free_drop: - LASSERT (msg->msg_md == NULL); - LNET_LOCK(); - if (msg->msg_rxpeer != NULL) { - lnet_peer_decref_locked(msg->msg_rxpeer); - msg->msg_rxpeer = NULL; - } - lnet_msg_free(msg); /* expects LNET_LOCK held */ - LNET_UNLOCK(); - - drop: - lnet_drop_message(ni, private, payload_length); - return 0; -} - -int -LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, - lnet_process_id_t target, unsigned int portal, - __u64 match_bits, unsigned int offset, - __u64 hdr_data) -{ - lnet_msg_t *msg; - lnet_libmd_t *md; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer (target.nid, 1)) /* shall we now? */ - { - CERROR("Dropping PUT to %s: simulated failure\n", - libcfs_id2str(target)); - return -EIO; - } - - msg = lnet_msg_alloc(); - if (msg == NULL) { - CERROR("Dropping PUT to %s: ENOMEM on lnet_msg_t\n", - libcfs_id2str(target)); - return -ENOMEM; - } - - LNET_LOCK(); - - md = lnet_handle2md(&mdh); - if (md == NULL || md->md_threshold == 0) { - lnet_msg_free(msg); - LNET_UNLOCK(); - - CERROR("Dropping PUT to %s: MD invalid\n", - libcfs_id2str(target)); - return -ENOENT; - } - - CDEBUG(D_NET, "LNetPut -> %s\n", libcfs_id2str(target)); - - lnet_commit_md(md, msg); - - lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length); - - msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits); - msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal); - msg->msg_hdr.msg.put.offset = cpu_to_le32(offset); - msg->msg_hdr.msg.put.hdr_data = hdr_data; - - /* NB handles only looked up by creator (no flips) */ - if (ack == LNET_ACK_REQ) { - msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie = - the_lnet.ln_interface_cookie; - msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie = - md->md_lh.lh_cookie; - } else { - msg->msg_hdr.msg.put.ack_wmd = LNET_WIRE_HANDLE_NONE; - } - - msg->msg_ev.type = LNET_EVENT_SEND; - msg->msg_ev.initiator.nid = LNET_NID_ANY; - msg->msg_ev.initiator.pid = the_lnet.ln_pid; - msg->msg_ev.target = target; - msg->msg_ev.sender = LNET_NID_ANY; - msg->msg_ev.pt_index = portal; - msg->msg_ev.match_bits = match_bits; - msg->msg_ev.rlength = md->md_length; - msg->msg_ev.mlength = md->md_length; - msg->msg_ev.offset = offset; - msg->msg_ev.hdr_data = hdr_data; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - the_lnet.ln_counters.send_count++; - the_lnet.ln_counters.send_length += md->md_length; - - LNET_UNLOCK(); - - rc = lnet_send(self, msg); - if (rc != 0) { - CERROR("Error sending PUT to %s: %d\n", - libcfs_id2str(target), rc); - lnet_finalize (NULL, msg, rc); - } - - /* completion will be signalled by an event */ - return 0; -} - -lnet_msg_t * -lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg) -{ - /* The LND can DMA direct to the GET md (i.e. no REPLY msg). This - * returns a msg for the LND to pass to lnet_finalize() when the sink - * data has been received. - * - * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when - * lnet_finalize() is called on it, so the LND must call this first */ - - lnet_msg_t *msg = lnet_msg_alloc(); - lnet_libmd_t *getmd = getmsg->msg_md; - lnet_process_id_t peer_id = getmsg->msg_target; - - LASSERT (!getmsg->msg_target_is_router); - LASSERT (!getmsg->msg_routing); - - LNET_LOCK(); - - LASSERT (getmd->md_refcount > 0); - - if (msg == NULL) { - CERROR ("%s: Dropping REPLY from %s: can't allocate msg\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id)); - goto drop; - } - - if (getmd->md_threshold == 0) { - CERROR ("%s: Dropping REPLY from %s for inactive MD %p\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), - getmd); - goto drop_msg; - } - - LASSERT (getmd->md_offset == 0); - - CDEBUG(D_NET, "%s: Reply from %s md %p\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd); - - lnet_commit_md (getmd, msg); - - msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */ - - msg->msg_ev.type = LNET_EVENT_REPLY; - msg->msg_ev.initiator = peer_id; - msg->msg_ev.sender = peer_id.nid; /* optimized GETs can't be routed */ - msg->msg_ev.rlength = msg->msg_ev.mlength = getmd->md_length; - msg->msg_ev.offset = 0; - - lnet_md_deconstruct(getmd, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, getmd); - - the_lnet.ln_counters.recv_count++; - the_lnet.ln_counters.recv_length += getmd->md_length; - - LNET_UNLOCK(); - - return msg; - - drop_msg: - lnet_msg_free(msg); - drop: - the_lnet.ln_counters.drop_count++; - the_lnet.ln_counters.drop_length += getmd->md_length; - - LNET_UNLOCK (); - - return NULL; -} - -void -lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *reply, unsigned int len) -{ - /* Set the REPLY length, now the RDMA that elides the REPLY message has - * completed and I know it. */ - LASSERT (reply != NULL); - LASSERT (reply->msg_type == LNET_MSG_GET); - LASSERT (reply->msg_ev.type == LNET_EVENT_REPLY); - - /* NB I trusted my peer to RDMA. If she tells me she's written beyond - * the end of my buffer, I might as well be dead. */ - LASSERT (len <= reply->msg_ev.mlength); - - reply->msg_ev.mlength = len; -} - -int -LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, - lnet_process_id_t target, unsigned int portal, - __u64 match_bits, unsigned int offset) -{ - lnet_msg_t *msg; - lnet_libmd_t *md; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer (target.nid, 1)) /* shall we now? */ - { - CERROR("Dropping GET to %s: simulated failure\n", - libcfs_id2str(target)); - return -EIO; - } - - msg = lnet_msg_alloc(); - if (msg == NULL) { - CERROR("Dropping GET to %s: ENOMEM on lnet_msg_t\n", - libcfs_id2str(target)); - return -ENOMEM; - } - - LNET_LOCK(); - - md = lnet_handle2md(&mdh); - if (md == NULL || md->md_threshold == 0) { - lnet_msg_free(msg); - LNET_UNLOCK(); - - CERROR("Dropping GET to %s: MD invalid\n", - libcfs_id2str(target)); - return -ENOENT; - } - - CDEBUG(D_NET, "LNetGet -> %s\n", libcfs_id2str(target)); - - lnet_commit_md(md, msg); - - lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0); - - msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits); - msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal); - msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset); - msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length); - - /* NB handles only looked up by creator (no flips) */ - msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie = - the_lnet.ln_interface_cookie; - msg->msg_hdr.msg.get.return_wmd.wh_object_cookie = - md->md_lh.lh_cookie; - - msg->msg_ev.type = LNET_EVENT_SEND; - msg->msg_ev.initiator.nid = LNET_NID_ANY; - msg->msg_ev.initiator.pid = the_lnet.ln_pid; - msg->msg_ev.target = target; - msg->msg_ev.sender = LNET_NID_ANY; - msg->msg_ev.pt_index = portal; - msg->msg_ev.match_bits = match_bits; - msg->msg_ev.rlength = md->md_length; - msg->msg_ev.mlength = md->md_length; - msg->msg_ev.offset = offset; - msg->msg_ev.hdr_data = 0; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - the_lnet.ln_counters.send_count++; - - LNET_UNLOCK(); - - rc = lnet_send(self, msg); - if (rc < 0) { - CERROR("error sending GET to %s: %d\n", - libcfs_id2str(target), rc); - lnet_finalize (NULL, msg, rc); - } - - /* completion will be signalled by an event */ - return 0; -} - -int -LNetDist (lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) -{ - struct list_head *e; - lnet_ni_t *ni; - lnet_route_t *route; - lnet_remotenet_t *rnet; - __u32 dstnet = LNET_NIDNET(dstnid); - int hops; - __u32 order = 2; - - /* if !local_nid_dist_zero, I don't return a distance of 0 ever - * (when lustre sees a distance of 0, it substitutes 0@lo), so I - * keep order 0 free for 0@lo and order 1 free for a local NID - * match */ - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - list_for_each (e, &the_lnet.ln_nis) { - ni = list_entry(e, lnet_ni_t, ni_list); - - if (ni->ni_nid == dstnid || - (the_lnet.ln_ptlcompat > 0 && - LNET_NIDNET(dstnid) == 0 && - LNET_NIDADDR(dstnid) == LNET_NIDADDR(ni->ni_nid) && - LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) != LOLND)) { - if (srcnidp != NULL) - *srcnidp = dstnid; - if (orderp != NULL) { - if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND) - *orderp = 0; - else - *orderp = 1; - } - LNET_UNLOCK(); - - return local_nid_dist_zero ? 0 : 1; - } - - if (LNET_NIDNET(ni->ni_nid) == dstnet || - (the_lnet.ln_ptlcompat > 0 && - dstnet == 0 && - LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) != LOLND)) { - if (srcnidp != NULL) - *srcnidp = ni->ni_nid; - if (orderp != NULL) - *orderp = order; - LNET_UNLOCK(); - return 1; - } - - order++; - } - - list_for_each (e, &the_lnet.ln_remote_nets) { - rnet = list_entry(e, lnet_remotenet_t, lrn_list); - - if (rnet->lrn_net == dstnet) { - LASSERT (!list_empty(&rnet->lrn_routes)); - route = list_entry(rnet->lrn_routes.next, - lnet_route_t, lr_list); - hops = rnet->lrn_hops; - if (srcnidp != NULL) - *srcnidp = route->lr_gateway->lp_ni->ni_nid; - if (orderp != NULL) - *orderp = order; - LNET_UNLOCK(); - return hops + 1; - } - order++; - } - - LNET_UNLOCK(); - return -EHOSTUNREACH; -} - -int -LNetSetAsync(lnet_process_id_t id, int nasync) -{ -#ifdef __KERNEL__ - return 0; -#else - lnet_ni_t *ni; - lnet_remotenet_t *rnet; - struct list_head *tmp; - lnet_route_t *route; - lnet_nid_t *nids; - int nnids; - int maxnids = 256; - int rc = 0; - int rc2; - - /* Target on a local network? */ - - ni = lnet_net2ni(LNET_NIDNET(id.nid)); - if (ni != NULL) { - if (ni->ni_lnd->lnd_setasync != NULL) - rc = (ni->ni_lnd->lnd_setasync)(ni, id, nasync); - lnet_ni_decref(ni); - return rc; - } - - /* Target on a remote network: apply to routers */ - again: - LIBCFS_ALLOC(nids, maxnids * sizeof(*nids)); - if (nids == NULL) - return -ENOMEM; - nnids = 0; - - /* Snapshot all the router NIDs */ - LNET_LOCK(); - rnet = lnet_find_net_locked(LNET_NIDNET(id.nid)); - if (rnet != NULL) { - list_for_each(tmp, &rnet->lrn_routes) { - if (nnids == maxnids) { - LNET_UNLOCK(); - LIBCFS_FREE(nids, maxnids * sizeof(*nids)); - maxnids *= 2; - goto again; - } - - route = list_entry(tmp, lnet_route_t, lr_list); - nids[nnids++] = route->lr_gateway->lp_nid; - } - } - LNET_UNLOCK(); - - /* set async on all the routers */ - while (nnids-- > 0) { - id.pid = LUSTRE_SRV_LNET_PID; - id.nid = nids[nnids]; - - ni = lnet_net2ni(LNET_NIDNET(id.nid)); - if (ni == NULL) - continue; - - if (ni->ni_lnd->lnd_setasync != NULL) { - rc2 = (ni->ni_lnd->lnd_setasync)(ni, id, nasync); - if (rc2 != 0) - rc = rc2; - } - lnet_ni_decref(ni); - } - - LIBCFS_FREE(nids, maxnids * sizeof(*nids)); - return rc; -#endif -} - diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c deleted file mode 100644 index 84dbbeea2962495713daefc54ba99add378d540e..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-msg.c +++ /dev/null @@ -1,238 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-msg.c - * Message decoding, parsing and finalizing routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -void -lnet_build_unlink_event (lnet_libmd_t *md, lnet_event_t *ev) -{ - memset(ev, 0, sizeof(*ev)); - - ev->status = 0; - ev->unlinked = 1; - ev->type = LNET_EVENT_UNLINK; - lnet_md_deconstruct(md, &ev->md); - lnet_md2handle(&ev->md_handle, md); -} - -void -lnet_enq_event_locked (lnet_eq_t *eq, lnet_event_t *ev) -{ - lnet_event_t *eq_slot; - - /* Allocate the next queue slot */ - ev->sequence = eq->eq_enq_seq++; - - /* size must be a power of 2 to handle sequence # overflow */ - LASSERT (eq->eq_size != 0 && - eq->eq_size == LOWEST_BIT_SET (eq->eq_size)); - eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1)); - - /* There is no race since both event consumers and event producers - * take the LNET_LOCK, so we don't screw around with memory - * barriers, setting the sequence number last or wierd structure - * layout assertions. */ - *eq_slot = *ev; - - /* Call the callback handler (if any) */ - if (eq->eq_callback != NULL) - eq->eq_callback (eq_slot); - -#ifdef __KERNEL__ - /* Wake anyone waiting in LNetEQPoll() */ - if (cfs_waitq_active(&the_lnet.ln_waitq)) - cfs_waitq_broadcast(&the_lnet.ln_waitq); -#else -# ifndef HAVE_LIBPTHREAD - /* LNetEQPoll() calls into _the_ LND to wait for action */ -# else - /* Wake anyone waiting in LNetEQPoll() */ - pthread_cond_broadcast(&the_lnet.ln_cond); -# endif -#endif -} - -void -lnet_complete_msg_locked(lnet_msg_t *msg) -{ - lnet_handle_wire_t ack_wmd; - int rc; - int status = msg->msg_ev.status; - - LASSERT (msg->msg_onactivelist); - - if (status == 0 && msg->msg_ack) { - /* Only send an ACK if the PUT completed successfully */ - - lnet_return_credits_locked(msg); - - msg->msg_ack = 0; - LNET_UNLOCK(); - - LASSERT(msg->msg_ev.type == LNET_EVENT_PUT); - LASSERT(!msg->msg_routing); - - ack_wmd = msg->msg_hdr.msg.put.ack_wmd; - - lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0); - - msg->msg_hdr.msg.ack.dst_wmd = ack_wmd; - msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits; - msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength); - - rc = lnet_send(msg->msg_ev.target.nid, msg); - - LNET_LOCK(); - - if (rc == 0) - return; - } else if (status == 0 && /* OK so far */ - (msg->msg_routing && !msg->msg_sending)) { /* not forwarded */ - - LASSERT (!msg->msg_receiving); /* called back recv already */ - - LNET_UNLOCK(); - - rc = lnet_send(LNET_NID_ANY, msg); - - LNET_LOCK(); - - if (rc == 0) - return; - } - - lnet_return_credits_locked(msg); - - LASSERT (msg->msg_onactivelist); - msg->msg_onactivelist = 0; - list_del (&msg->msg_activelist); - the_lnet.ln_counters.msgs_alloc--; - lnet_msg_free(msg); -} - - -void -lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status) -{ -#ifdef __KERNEL__ - int i; - int my_slot; -#endif - lnet_libmd_t *md; - - LASSERT (!in_interrupt ()); - - if (msg == NULL) - return; -#if 0 - CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n", - lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target), - msg->msg_target_is_router ? "t" : "", - msg->msg_routing ? "X" : "", - msg->msg_ack ? "A" : "", - msg->msg_sending ? "S" : "", - msg->msg_receiving ? "R" : "", - msg->msg_delayed ? "d" : "", - msg->msg_txcredit ? "C" : "", - msg->msg_peertxcredit ? "c" : "", - msg->msg_rtrcredit ? "F" : "", - msg->msg_peerrtrcredit ? "f" : "", - msg->msg_onactivelist ? "!" : "", - msg->msg_txpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid), - msg->msg_rxpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid)); -#endif - LNET_LOCK(); - - LASSERT (msg->msg_onactivelist); - - msg->msg_ev.status = status; - - md = msg->msg_md; - if (md != NULL) { - int unlink; - - /* Now it's safe to drop my caller's ref */ - md->md_refcount--; - LASSERT (md->md_refcount >= 0); - - unlink = lnet_md_unlinkable(md); - - msg->msg_ev.unlinked = unlink; - - if (md->md_eq != NULL) - lnet_enq_event_locked(md->md_eq, &msg->msg_ev); - - if (unlink) - lnet_md_unlink(md); - - msg->msg_md = NULL; - } - - list_add_tail (&msg->msg_list, &the_lnet.ln_finalizeq); - - /* Recursion breaker. Don't complete the message here if I am (or - * enough other threads are) already completing messages */ - -#ifdef __KERNEL__ - my_slot = -1; - for (i = 0; i < the_lnet.ln_nfinalizers; i++) { - if (the_lnet.ln_finalizers[i] == cfs_current()) - goto out; - if (my_slot < 0 && the_lnet.ln_finalizers[i] == NULL) - my_slot = i; - } - if (my_slot < 0) - goto out; - - the_lnet.ln_finalizers[my_slot] = cfs_current(); -#else - if (the_lnet.ln_finalizing) - goto out; - - the_lnet.ln_finalizing = 1; -#endif - - while (!list_empty(&the_lnet.ln_finalizeq)) { - msg = list_entry(the_lnet.ln_finalizeq.next, - lnet_msg_t, msg_list); - - list_del(&msg->msg_list); - - /* NB drops and regains the lnet lock if it actually does - * anything, so my finalizing friends can chomp along too */ - lnet_complete_msg_locked(msg); - } - -#ifdef __KERNEL__ - the_lnet.ln_finalizers[my_slot] = NULL; -#else - the_lnet.ln_finalizing = 0; -#endif - - out: - LNET_UNLOCK(); -} - diff --git a/lnet/lnet/lo.c b/lnet/lnet/lo.c deleted file mode 100644 index e123b3d8b6914eb5366deb6c3ea99c8a2374617c..0000000000000000000000000000000000000000 --- a/lnet/lnet/lo.c +++ /dev/null @@ -1,112 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -int -lolnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - LASSERT (!lntmsg->msg_routing); - LASSERT (!lntmsg->msg_target_is_router); - - return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0); -} - -int -lolnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - lnet_msg_t *sendmsg = private; - - if (lntmsg != NULL) { /* not discarding */ - if (sendmsg->msg_iov != NULL) { - if (iov != NULL) - lnet_copy_iov2iov(niov, iov, offset, - sendmsg->msg_niov, - sendmsg->msg_iov, - sendmsg->msg_offset, mlen); - else - lnet_copy_iov2kiov(niov, kiov, offset, - sendmsg->msg_niov, - sendmsg->msg_iov, - sendmsg->msg_offset, mlen); - } else { - if (iov != NULL) - lnet_copy_kiov2iov(niov, iov, offset, - sendmsg->msg_niov, - sendmsg->msg_kiov, - sendmsg->msg_offset, mlen); - else - lnet_copy_kiov2kiov(niov, kiov, offset, - sendmsg->msg_niov, - sendmsg->msg_kiov, - sendmsg->msg_offset, mlen); - } - - lnet_finalize(ni, lntmsg, 0); - } - - lnet_finalize(ni, sendmsg, 0); - return 0; -} - -static int lolnd_instanced; - -void -lolnd_shutdown(lnet_ni_t *ni) -{ - CDEBUG (D_NET, "shutdown\n"); - LASSERT (lolnd_instanced); - - lolnd_instanced = 0; -} - -int -lolnd_startup (lnet_ni_t *ni) -{ - LASSERT (ni->ni_lnd == &the_lolnd); - LASSERT (!lolnd_instanced); - lolnd_instanced = 1; - - return (0); -} - -lnd_t the_lolnd = { - /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list}, - /* .lnd_refcount = */ 0, - /* .lnd_type = */ LOLND, - /* .lnd_startup = */ lolnd_startup, - /* .lnd_shutdown = */ lolnd_shutdown, - /* .lnt_ctl = */ NULL, - /* .lnd_send = */ lolnd_send, - /* .lnd_recv = */ lolnd_recv, - /* .lnd_eager_recv = */ NULL, - /* .lnd_notify = */ NULL, -#ifdef __KERNEL__ - /* .lnd_accept = */ NULL -#else - /* .lnd_wait = */ NULL -#endif -}; - diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c deleted file mode 100644 index d612fafc68d28e9dba9273b0df03562eada297d3..0000000000000000000000000000000000000000 --- a/lnet/lnet/module.c +++ /dev/null @@ -1,184 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -static int config_on_load = 0; -CFS_MODULE_PARM(config_on_load, "i", int, 0444, - "configure network at module load"); - -static struct semaphore lnet_config_mutex; - -int -lnet_configure (void *arg) -{ - /* 'arg' only there so I can be passed to cfs_kernel_thread() */ - int rc = 0; - - LNET_MUTEX_DOWN(&lnet_config_mutex); - - if (!the_lnet.ln_niinit_self) { - rc = LNetNIInit(LUSTRE_SRV_LNET_PID); - if (rc >= 0) { - the_lnet.ln_niinit_self = 1; - rc = 0; - } - } - - LNET_MUTEX_UP(&lnet_config_mutex); - return rc; -} - -int -lnet_unconfigure (void) -{ - int refcount; - - LNET_MUTEX_DOWN(&lnet_config_mutex); - - if (the_lnet.ln_niinit_self) { - the_lnet.ln_niinit_self = 0; - LNetNIFini(); - } - - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - refcount = the_lnet.ln_refcount; - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - - LNET_MUTEX_UP(&lnet_config_mutex); - return (refcount == 0) ? 0 : -EBUSY; -} - -int -lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data) -{ - int rc; - - switch (cmd) { - case IOC_LIBCFS_CONFIGURE: - return lnet_configure(NULL); - - case IOC_LIBCFS_UNCONFIGURE: - return lnet_unconfigure(); - - default: - /* Passing LNET_PID_ANY only gives me a ref if the net is up - * already; I'll need it to ensure the net can't go down while - * I'm called into it */ - rc = LNetNIInit(LNET_PID_ANY); - if (rc >= 0) { - rc = LNetCtl(cmd, data); - LNetNIFini(); - } - return rc; - } -} - -DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl); - -int -init_lnet(void) -{ - int rc; - ENTRY; - - init_mutex(&lnet_config_mutex); - - rc = LNetInit(); - if (rc != 0) { - CERROR("LNetInit: error %d\n", rc); - RETURN(rc); - } - - rc = libcfs_register_ioctl(&lnet_ioctl_handler); - LASSERT (rc == 0); - - if (config_on_load) { - /* Have to schedule a separate thread to avoid deadlocking - * in modload */ - (void) cfs_kernel_thread(lnet_configure, NULL, 0); - } - - RETURN(0); -} - -void -fini_lnet(void) -{ - int rc; - - rc = libcfs_deregister_ioctl(&lnet_ioctl_handler); - LASSERT (rc == 0); - - LNetFini(); -} - -EXPORT_SYMBOL(lnet_register_lnd); -EXPORT_SYMBOL(lnet_unregister_lnd); - -EXPORT_SYMBOL(LNetMEAttach); -EXPORT_SYMBOL(LNetMEInsert); -EXPORT_SYMBOL(LNetMEUnlink); -EXPORT_SYMBOL(LNetEQAlloc); -EXPORT_SYMBOL(LNetMDAttach); -EXPORT_SYMBOL(LNetMDUnlink); -EXPORT_SYMBOL(LNetNIInit); -EXPORT_SYMBOL(LNetNIFini); -EXPORT_SYMBOL(LNetInit); -EXPORT_SYMBOL(LNetFini); -EXPORT_SYMBOL(LNetSnprintHandle); -EXPORT_SYMBOL(LNetPut); -EXPORT_SYMBOL(LNetGet); -EXPORT_SYMBOL(LNetEQWait); -EXPORT_SYMBOL(LNetEQFree); -EXPORT_SYMBOL(LNetEQGet); -EXPORT_SYMBOL(LNetGetId); -EXPORT_SYMBOL(LNetMDBind); -EXPORT_SYMBOL(LNetDist); -EXPORT_SYMBOL(LNetSetAsync); -EXPORT_SYMBOL(LNetCtl); -EXPORT_SYMBOL(LNetSetLazyPortal); -EXPORT_SYMBOL(LNetClearLazyPortal); -EXPORT_SYMBOL(the_lnet); -EXPORT_SYMBOL(lnet_iov_nob); -EXPORT_SYMBOL(lnet_extract_iov); -EXPORT_SYMBOL(lnet_kiov_nob); -EXPORT_SYMBOL(lnet_extract_kiov); -EXPORT_SYMBOL(lnet_copy_iov2iov); -EXPORT_SYMBOL(lnet_copy_iov2kiov); -EXPORT_SYMBOL(lnet_copy_kiov2iov); -EXPORT_SYMBOL(lnet_copy_kiov2kiov); -EXPORT_SYMBOL(lnet_finalize); -EXPORT_SYMBOL(lnet_parse); -EXPORT_SYMBOL(lnet_create_reply_msg); -EXPORT_SYMBOL(lnet_set_reply_msg_len); -EXPORT_SYMBOL(lnet_msgtyp2str); -EXPORT_SYMBOL(lnet_net2ni_locked); - -MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>"); -MODULE_DESCRIPTION("Portals v3.1"); -MODULE_LICENSE("GPL"); - -cfs_module(lnet, "1.0.0", init_lnet, fini_lnet); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c deleted file mode 100644 index d4b58fa8461bf959d8cb0f38df93db081beac228..0000000000000000000000000000000000000000 --- a/lnet/lnet/peer.c +++ /dev/null @@ -1,250 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-move.c - * Data movement routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -int -lnet_create_peer_table(void) -{ - struct list_head *hash; - int i; - - LASSERT (the_lnet.ln_peer_hash == NULL); - LIBCFS_ALLOC(hash, LNET_PEER_HASHSIZE * sizeof(struct list_head)); - - if (hash == NULL) { - CERROR("Can't allocate peer hash table\n"); - return -ENOMEM; - } - - for (i = 0; i < LNET_PEER_HASHSIZE; i++) - CFS_INIT_LIST_HEAD(&hash[i]); - - the_lnet.ln_peer_hash = hash; - return 0; -} - -void -lnet_destroy_peer_table(void) -{ - int i; - - if (the_lnet.ln_peer_hash == NULL) - return; - - for (i = 0; i < LNET_PEER_HASHSIZE; i++) - LASSERT (list_empty(&the_lnet.ln_peer_hash[i])); - - LIBCFS_FREE(the_lnet.ln_peer_hash, - LNET_PEER_HASHSIZE * sizeof (struct list_head)); - the_lnet.ln_peer_hash = NULL; -} - -void -lnet_clear_peer_table(void) -{ - int i; - - LASSERT (the_lnet.ln_shutdown); /* i.e. no new peers */ - - for (i = 0; i < LNET_PEER_HASHSIZE; i++) { - struct list_head *peers = &the_lnet.ln_peer_hash[i]; - - LNET_LOCK(); - while (!list_empty(peers)) { - lnet_peer_t *lp = list_entry(peers->next, - lnet_peer_t, lp_hashlist); - - list_del(&lp->lp_hashlist); - lnet_peer_decref_locked(lp); /* lose hash table's ref */ - } - LNET_UNLOCK(); - } - - LNET_LOCK(); - for (i = 3; the_lnet.ln_npeers != 0;i++) { - LNET_UNLOCK(); - - if ((i & (i-1)) == 0) - CDEBUG(D_WARNING,"Waiting for %d peers\n", - the_lnet.ln_npeers); - cfs_pause(cfs_time_seconds(1)); - - LNET_LOCK(); - } - LNET_UNLOCK(); -} - -void -lnet_destroy_peer_locked (lnet_peer_t *lp) -{ - lnet_ni_decref_locked(lp->lp_ni); - LNET_UNLOCK(); - - LASSERT (lp->lp_refcount == 0); - LASSERT (lp->lp_rtr_refcount == 0); - LASSERT (list_empty(&lp->lp_txq)); - LASSERT (lp->lp_txqnob == 0); - - LIBCFS_FREE(lp, sizeof(*lp)); - - LNET_LOCK(); - - LASSERT(the_lnet.ln_npeers > 0); - the_lnet.ln_npeers--; -} - -lnet_peer_t * -lnet_find_peer_locked (lnet_nid_t nid) -{ - unsigned int idx = LNET_NIDADDR(nid) % LNET_PEER_HASHSIZE; - struct list_head *peers = &the_lnet.ln_peer_hash[idx]; - struct list_head *tmp; - lnet_peer_t *lp; - - if (the_lnet.ln_shutdown) - return NULL; - - list_for_each (tmp, peers) { - lp = list_entry(tmp, lnet_peer_t, lp_hashlist); - - if (lp->lp_nid == nid) { - lnet_peer_addref_locked(lp); - return lp; - } - } - - return NULL; -} - -int -lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid) -{ - lnet_peer_t *lp; - lnet_peer_t *lp2; - - lp = lnet_find_peer_locked(nid); - if (lp != NULL) { - *lpp = lp; - return 0; - } - - LNET_UNLOCK(); - - LIBCFS_ALLOC(lp, sizeof(*lp)); - if (lp == NULL) { - *lpp = NULL; - LNET_LOCK(); - return -ENOMEM; - } - - memset(lp, 0, sizeof(*lp)); /* zero counters etc */ - - CFS_INIT_LIST_HEAD(&lp->lp_txq); - CFS_INIT_LIST_HEAD(&lp->lp_rtrq); - - lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */ - lp->lp_notify = 0; - lp->lp_notifylnd = 0; - lp->lp_notifying = 0; - lp->lp_alive_count = 0; - lp->lp_timestamp = 0; - lp->lp_ping_timestamp = 0; - lp->lp_nid = nid; - lp->lp_refcount = 2; /* 1 for caller; 1 for hash */ - lp->lp_rtr_refcount = 0; - - LNET_LOCK(); - - lp2 = lnet_find_peer_locked(nid); - if (lp2 != NULL) { - LNET_UNLOCK(); - LIBCFS_FREE(lp, sizeof(*lp)); - LNET_LOCK(); - - if (the_lnet.ln_shutdown) { - lnet_peer_decref_locked(lp2); - *lpp = NULL; - return -ESHUTDOWN; - } - - *lpp = lp2; - return 0; - } - - lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid)); - if (lp->lp_ni == NULL) { - LNET_UNLOCK(); - LIBCFS_FREE(lp, sizeof(*lp)); - LNET_LOCK(); - - *lpp = NULL; - return the_lnet.ln_shutdown ? -ESHUTDOWN : -EHOSTUNREACH; - } - - lp->lp_txcredits = - lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; - - /* As a first approximation; allow this peer the same number of router - * buffers as it is allowed outstanding sends */ - lp->lp_rtrcredits = lp->lp_minrtrcredits = lp->lp_txcredits; - - LASSERT (!the_lnet.ln_shutdown); - /* can't add peers after shutdown starts */ - - list_add_tail(&lp->lp_hashlist, lnet_nid2peerhash(nid)); - the_lnet.ln_npeers++; - the_lnet.ln_peertable_version++; - *lpp = lp; - return 0; -} - -void -lnet_debug_peer(lnet_nid_t nid) -{ - int rc; - lnet_peer_t *lp; - - LNET_LOCK(); - - rc = lnet_nid2peer_locked(&lp, nid); - if (rc != 0) { - LNET_UNLOCK(); - CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid)); - return; - } - - CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n", - libcfs_nid2str(lp->lp_nid), lp->lp_refcount, - !lnet_isrouter(lp) ? "~rtr" : (lp->lp_alive ? "up" : "down"), - lp->lp_ni->ni_peertxcredits, - lp->lp_rtrcredits, lp->lp_minrtrcredits, - lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob); - - lnet_peer_decref_locked(lp); - - LNET_UNLOCK(); -} diff --git a/lnet/lnet/portals.xcode/project.pbxproj b/lnet/lnet/portals.xcode/project.pbxproj deleted file mode 100644 index 1dc0146c43a91ce8a117c6790b6fbf3d44da0a60..0000000000000000000000000000000000000000 --- a/lnet/lnet/portals.xcode/project.pbxproj +++ /dev/null @@ -1,430 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 32A4FEB80562C75700D090E7, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 089C167CFE841241C02AAC07, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = portals; - refType = 4; - sourceTree = "<group>"; - }; - 089C167CFE841241C02AAC07 = { - children = ( - 32A4FEC30562C75700D090E7, - ); - isa = PBXGroup; - name = Resources; - refType = 4; - sourceTree = "<group>"; - }; -//080 -//081 -//082 -//083 -//084 -//190 -//191 -//192 -//193 -//194 - 19A778270730EACD00846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = module.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778280730EACD00846375 = { - fileRef = 19A778270730EACD00846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7782B0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "api-errno.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782C0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "api-ni.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782D0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "api-wrap.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782E0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-eq.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782F0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-init.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778300730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-md.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778310730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-me.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778320730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-move.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778330730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-msg.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778340730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-ni.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778350730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-pid.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778360730EB8400846375 = { - fileRef = 19A7782B0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778370730EB8400846375 = { - fileRef = 19A7782C0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778380730EB8400846375 = { - fileRef = 19A7782D0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778390730EB8400846375 = { - fileRef = 19A7782E0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783A0730EB8400846375 = { - fileRef = 19A7782F0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783B0730EB8400846375 = { - fileRef = 19A778300730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783C0730EB8400846375 = { - fileRef = 19A778310730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783D0730EB8400846375 = { - fileRef = 19A778320730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783E0730EB8400846375 = { - fileRef = 19A778330730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783F0730EB8400846375 = { - fileRef = 19A778340730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778400730EB8400846375 = { - fileRef = 19A778350730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 32A4FEC40562C75800D090E7, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = "<group>"; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 19A7782B0730EB8400846375, - 19A7782C0730EB8400846375, - 19A7782D0730EB8400846375, - 19A7782E0730EB8400846375, - 19A7782F0730EB8400846375, - 19A778300730EB8400846375, - 19A778310730EB8400846375, - 19A778320730EB8400846375, - 19A778330730EB8400846375, - 19A778340730EB8400846375, - 19A778350730EB8400846375, - 19A778270730EACD00846375, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = "<group>"; - }; -//240 -//241 -//242 -//243 -//244 -//320 -//321 -//322 -//323 -//324 - 32A4FEB80562C75700D090E7 = { - buildPhases = ( - 32A4FEB90562C75700D090E7, - 32A4FEBA0562C75700D090E7, - 32A4FEBB0562C75700D090E7, - 32A4FEBD0562C75700D090E7, - 32A4FEBF0562C75700D090E7, - 32A4FEC00562C75700D090E7, - 32A4FEC10562C75700D090E7, - ); - buildRules = ( - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; - GCC_WARN_UNKNOWN_PRAGMAS = NO; - HEADER_SEARCH_PATHS = ../include; - INFOPLIST_FILE = Info.plist; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.clusterfs.lustre.portals.portals.portals; - MODULE_START = portals_start; - MODULE_STOP = portals_stop; - MODULE_VERSION = 1.0.1; - OTHER_CFLAGS = "-D__KERNEL__"; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = portals; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXNativeTarget; - name = portals; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = portals; - productReference = 32A4FEC40562C75800D090E7; - productType = "com.apple.product-type.kernel-extension"; - }; - 32A4FEB90562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEBA0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBB0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBD0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - 19A778280730EACD00846375, - 19A778360730EB8400846375, - 19A778370730EB8400846375, - 19A778380730EB8400846375, - 19A778390730EB8400846375, - 19A7783A0730EB8400846375, - 19A7783B0730EB8400846375, - 19A7783C0730EB8400846375, - 19A7783D0730EB8400846375, - 19A7783E0730EB8400846375, - 19A7783F0730EB8400846375, - 19A778400730EB8400846375, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBF0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC00562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC10562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEC30562C75700D090E7 = { - isa = PBXFileReference; - lastKnownFileType = text.plist.xml; - path = Info.plist; - refType = 4; - sourceTree = "<group>"; - }; - 32A4FEC40562C75800D090E7 = { - explicitFileType = wrapper.cfbundle; - includeInIndex = 0; - isa = PBXFileReference; - path = portals.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c deleted file mode 100644 index 881f4adc06463c6147802c590e021184f8cf0c5a..0000000000000000000000000000000000000000 --- a/lnet/lnet/router.c +++ /dev/null @@ -1,1075 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -#if defined(__KERNEL__) && defined(LNET_ROUTER) - -static char *forwarding = ""; -CFS_MODULE_PARM(forwarding, "s", charp, 0444, - "Explicitly enable/disable forwarding between networks"); - -static int tiny_router_buffers = 1024; -CFS_MODULE_PARM(tiny_router_buffers, "i", int, 0444, - "# of 0 payload messages to buffer in the router"); -static int small_router_buffers = 8192; -CFS_MODULE_PARM(small_router_buffers, "i", int, 0444, - "# of small (1 page) messages to buffer in the router"); -static int large_router_buffers = 512; -CFS_MODULE_PARM(large_router_buffers, "i", int, 0444, - "# of large messages to buffer in the router"); - -static int auto_down = 1; -CFS_MODULE_PARM(auto_down, "i", int, 0444, - "Automatically mark peers down on comms error"); - -static int check_routers_before_use = 0; -CFS_MODULE_PARM(check_routers_before_use, "i", int, 0444, - "Assume routers are down and ping them before use"); - -static int dead_router_check_interval = 0; -CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0444, - "Seconds between dead router health checks (<= 0 to disable)"); - -static int live_router_check_interval = 0; -CFS_MODULE_PARM(live_router_check_interval, "i", int, 0444, - "Seconds between live router health checks (<= 0 to disable)"); - -static int router_ping_timeout = 50; -CFS_MODULE_PARM(router_ping_timeout, "i", int, 0444, - "Seconds to wait for the reply to a router health query"); - -int -lnet_peers_start_down(void) -{ - return check_routers_before_use; -} - -void -lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, time_t when) -{ - if (when < lp->lp_timestamp) { /* out of date information */ - CDEBUG(D_NET, "Out of date\n"); - return; - } - - lp->lp_timestamp = when; /* update timestamp */ - lp->lp_ping_deadline = 0; /* disable ping timeout */ - - if (lp->lp_alive_count != 0 && /* got old news */ - (!lp->lp_alive) == (!alive)) { /* new date for old news */ - CDEBUG(D_NET, "Old news\n"); - return; - } - - /* Flag that notification is outstanding */ - - lp->lp_alive_count++; - lp->lp_alive = !(!alive); /* 1 bit! */ - lp->lp_notify = 1; - lp->lp_notifylnd |= notifylnd; - - CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive); -} - -void -lnet_do_notify (lnet_peer_t *lp) -{ - lnet_ni_t *ni = lp->lp_ni; - int alive; - int notifylnd; - - LNET_LOCK(); - - /* Notify only in 1 thread at any time to ensure ordered notification. - * NB individual events can be missed; the only guarantee is that you - * always get the most recent news */ - - if (lp->lp_notifying) { - LNET_UNLOCK(); - return; - } - - lp->lp_notifying = 1; - - while (lp->lp_notify) { - alive = lp->lp_alive; - notifylnd = lp->lp_notifylnd; - - lp->lp_notifylnd = 0; - lp->lp_notify = 0; - - if (notifylnd && ni->ni_lnd->lnd_notify != NULL) { - LNET_UNLOCK(); - - /* A new notification could happen now; I'll handle it - * when control returns to me */ - - (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive); - - LNET_LOCK(); - } - } - - lp->lp_notifying = 0; - - LNET_UNLOCK(); -} - -int -lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, time_t when) -{ - lnet_peer_t *lp = NULL; - time_t now = cfs_time_current_sec(); - - LASSERT (!in_interrupt ()); - - CDEBUG (D_NET, "%s notifying %s: %s\n", - (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid), - libcfs_nid2str(nid), - alive ? "up" : "down"); - - if (ni != NULL && - LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) { - CWARN ("Ignoring notification of %s %s by %s (different net)\n", - libcfs_nid2str(nid), alive ? "birth" : "death", - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - } - - /* can't do predictions... */ - if (when > now) { - CWARN ("Ignoring prediction from %s of %s %s " - "%ld seconds in the future\n", - (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid), - libcfs_nid2str(nid), alive ? "up" : "down", - when - now); - return -EINVAL; - } - - if (ni != NULL && !alive && /* LND telling me she's down */ - !auto_down) { /* auto-down disabled */ - CDEBUG(D_NET, "Auto-down disabled\n"); - return 0; - } - - LNET_LOCK(); - - lp = lnet_find_peer_locked(nid); - if (lp == NULL) { - /* nid not found */ - LNET_UNLOCK(); - CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid)); - return 0; - } - - lnet_notify_locked(lp, ni == NULL, alive, when); - - LNET_UNLOCK(); - - lnet_do_notify(lp); - - LNET_LOCK(); - - lnet_peer_decref_locked(lp); - - LNET_UNLOCK(); - return 0; -} -EXPORT_SYMBOL(lnet_notify); - -#else - -int -lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, time_t when) -{ - return -EOPNOTSUPP; -} - -#endif - -static void -lnet_rtr_addref_locked(lnet_peer_t *lp) -{ - LASSERT (lp->lp_refcount > 0); - LASSERT (lp->lp_rtr_refcount >= 0); - - lp->lp_rtr_refcount++; - if (lp->lp_rtr_refcount == 1) { - struct list_head *pos; - - /* a simple insertion sort */ - list_for_each_prev(pos, &the_lnet.ln_routers) { - lnet_peer_t *rtr = list_entry(pos, lnet_peer_t, - lp_rtr_list); - - if (rtr->lp_nid < lp->lp_nid) - break; - } - - list_add(&lp->lp_rtr_list, pos); - /* addref for the_lnet.ln_routers */ - lnet_peer_addref_locked(lp); - the_lnet.ln_routers_version++; - } -} - -static void -lnet_rtr_decref_locked(lnet_peer_t *lp) -{ - LASSERT (lp->lp_refcount > 0); - LASSERT (lp->lp_rtr_refcount > 0); - - lp->lp_rtr_refcount--; - if (lp->lp_rtr_refcount == 0) { - list_del(&lp->lp_rtr_list); - /* decref for the_lnet.ln_routers */ - lnet_peer_decref_locked(lp); - the_lnet.ln_routers_version++; - } -} - -lnet_remotenet_t * -lnet_find_net_locked (__u32 net) -{ - lnet_remotenet_t *rnet; - struct list_head *tmp; - - LASSERT (!the_lnet.ln_shutdown); - - list_for_each (tmp, &the_lnet.ln_remote_nets) { - rnet = list_entry(tmp, lnet_remotenet_t, lrn_list); - - if (rnet->lrn_net == net) - return rnet; - } - return NULL; -} - -int -lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway) -{ - struct list_head zombies; - struct list_head *e; - lnet_remotenet_t *rnet; - lnet_remotenet_t *rnet2; - lnet_route_t *route; - lnet_route_t *route2; - lnet_ni_t *ni; - int add_route; - int rc; - - CDEBUG(D_NET, "Add route: net %s hops %u gw %s\n", - libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - - if (gateway == LNET_NID_ANY || - LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND || - net == LNET_NIDNET(LNET_NID_ANY) || - LNET_NETTYP(net) == LOLND || - LNET_NIDNET(gateway) == net || - hops < 1 || hops > 255) - return (-EINVAL); - - if (lnet_islocalnet(net)) /* it's a local network */ - return 0; /* ignore the route entry */ - - /* Assume net, route, all new */ - LIBCFS_ALLOC(route, sizeof(*route)); - LIBCFS_ALLOC(rnet, sizeof(*rnet)); - if (route == NULL || rnet == NULL) { - CERROR("Out of memory creating route %s %d %s\n", - libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - if (route != NULL) - LIBCFS_FREE(route, sizeof(*route)); - if (rnet != NULL) - LIBCFS_FREE(rnet, sizeof(*rnet)); - return -ENOMEM; - } - - CFS_INIT_LIST_HEAD(&rnet->lrn_routes); - rnet->lrn_net = net; - rnet->lrn_hops = hops; - - LNET_LOCK(); - - rc = lnet_nid2peer_locked(&route->lr_gateway, gateway); - if (rc != 0) { - LNET_UNLOCK(); - - LIBCFS_FREE(route, sizeof(*route)); - LIBCFS_FREE(rnet, sizeof(*rnet)); - - if (rc == -EHOSTUNREACH) /* gateway is not on a local net */ - return 0; /* ignore the route entry */ - - CERROR("Error %d creating route %s %d %s\n", rc, - libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - return rc; - } - - LASSERT (!the_lnet.ln_shutdown); - CFS_INIT_LIST_HEAD(&zombies); - - rnet2 = lnet_find_net_locked(net); - if (rnet2 == NULL) { - /* new network */ - list_add_tail(&rnet->lrn_list, &the_lnet.ln_remote_nets); - rnet2 = rnet; - } - - if (hops > rnet2->lrn_hops) { - /* New route is longer; ignore it */ - add_route = 0; - } else if (hops < rnet2->lrn_hops) { - /* new route supercedes all currently known routes to this - * net */ - list_add(&zombies, &rnet2->lrn_routes); - list_del_init(&rnet2->lrn_routes); - add_route = 1; - } else { - add_route = 1; - /* New route has the same hopcount as existing routes; search - * for a duplicate route (it's a NOOP if it is) */ - list_for_each (e, &rnet2->lrn_routes) { - route2 = list_entry(e, lnet_route_t, lr_list); - - if (route2->lr_gateway == route->lr_gateway) { - add_route = 0; - break; - } - - /* our loopups must be true */ - LASSERT (route2->lr_gateway->lp_nid != gateway); - } - } - - if (add_route) { - ni = route->lr_gateway->lp_ni; - lnet_ni_addref_locked(ni); - - LASSERT (rc == 0); - list_add_tail(&route->lr_list, &rnet2->lrn_routes); - the_lnet.ln_remote_nets_version++; - - lnet_rtr_addref_locked(route->lr_gateway); - - LNET_UNLOCK(); - - /* XXX Assume alive */ - if (ni->ni_lnd->lnd_notify != NULL) - (ni->ni_lnd->lnd_notify)(ni, gateway, 1); - - lnet_ni_decref(ni); - } else { - lnet_peer_decref_locked(route->lr_gateway); - LNET_UNLOCK(); - LIBCFS_FREE(route, sizeof(*route)); - } - - if (rnet != rnet2) - LIBCFS_FREE(rnet, sizeof(*rnet)); - - while (!list_empty(&zombies)) { - route = list_entry(zombies.next, lnet_route_t, lr_list); - list_del(&route->lr_list); - - LNET_LOCK(); - lnet_rtr_decref_locked(route->lr_gateway); - lnet_peer_decref_locked(route->lr_gateway); - LNET_UNLOCK(); - LIBCFS_FREE(route, sizeof(*route)); - } - - return rc; -} - -int -lnet_check_routes (void) -{ - lnet_remotenet_t *rnet; - lnet_route_t *route; - lnet_route_t *route2; - struct list_head *e1; - struct list_head *e2; - - LNET_LOCK(); - - list_for_each (e1, &the_lnet.ln_remote_nets) { - rnet = list_entry(e1, lnet_remotenet_t, lrn_list); - - route2 = NULL; - list_for_each (e2, &rnet->lrn_routes) { - route = list_entry(e2, lnet_route_t, lr_list); - - if (route2 == NULL) - route2 = route; - else if (route->lr_gateway->lp_ni != - route2->lr_gateway->lp_ni) { - LNET_UNLOCK(); - - CERROR("Routes to %s via %s and %s not supported\n", - libcfs_net2str(rnet->lrn_net), - libcfs_nid2str(route->lr_gateway->lp_nid), - libcfs_nid2str(route2->lr_gateway->lp_nid)); - return -EINVAL; - } - } - } - - LNET_UNLOCK(); - return 0; -} - -int -lnet_del_route (__u32 net, lnet_nid_t gw_nid) -{ - lnet_remotenet_t *rnet; - lnet_route_t *route; - struct list_head *e1; - struct list_head *e2; - int rc = -ENOENT; - - CDEBUG(D_NET, "Del route: net %s : gw %s\n", - libcfs_net2str(net), libcfs_nid2str(gw_nid)); - - /* NB Caller may specify either all routes via the given gateway - * or a specific route entry actual NIDs) */ - - again: - LNET_LOCK(); - - list_for_each (e1, &the_lnet.ln_remote_nets) { - rnet = list_entry(e1, lnet_remotenet_t, lrn_list); - - if (!(net == LNET_NIDNET(LNET_NID_ANY) || - net == rnet->lrn_net)) - continue; - - list_for_each (e2, &rnet->lrn_routes) { - route = list_entry(e2, lnet_route_t, lr_list); - - if (!(gw_nid == LNET_NID_ANY || - gw_nid == route->lr_gateway->lp_nid)) - continue; - - list_del(&route->lr_list); - the_lnet.ln_remote_nets_version++; - - if (list_empty(&rnet->lrn_routes)) - list_del(&rnet->lrn_list); - else - rnet = NULL; - - lnet_rtr_decref_locked(route->lr_gateway); - lnet_peer_decref_locked(route->lr_gateway); - LNET_UNLOCK(); - - LIBCFS_FREE(route, sizeof (*route)); - - if (rnet != NULL) - LIBCFS_FREE(rnet, sizeof(*rnet)); - - rc = 0; - goto again; - } - } - - LNET_UNLOCK(); - return rc; -} - -void -lnet_destroy_routes (void) -{ - lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY); -} - -int -lnet_get_route (int idx, __u32 *net, __u32 *hops, - lnet_nid_t *gateway, __u32 *alive) -{ - struct list_head *e1; - struct list_head *e2; - lnet_remotenet_t *rnet; - lnet_route_t *route; - - LNET_LOCK(); - - list_for_each (e1, &the_lnet.ln_remote_nets) { - rnet = list_entry(e1, lnet_remotenet_t, lrn_list); - - list_for_each (e2, &rnet->lrn_routes) { - route = list_entry(e2, lnet_route_t, lr_list); - - if (idx-- == 0) { - *net = rnet->lrn_net; - *hops = rnet->lrn_hops; - *gateway = route->lr_gateway->lp_nid; - *alive = route->lr_gateway->lp_alive; - LNET_UNLOCK(); - return 0; - } - } - } - - LNET_UNLOCK(); - return -ENOENT; -} - -#if defined(__KERNEL__) && defined(LNET_ROUTER) -static void -lnet_router_checker_event (lnet_event_t *event) -{ - /* CAVEAT EMPTOR: I'm called with LNET_LOCKed and I'm not allowed to - * drop it (that's how come I see _every_ event, even ones that would - * overflow my EQ) */ - lnet_peer_t *lp; - lnet_nid_t nid; - - if (event->unlinked) { - /* The router checker thread has unlinked the rc_md - * and exited. */ - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_UNLINKING); - the_lnet.ln_rc_state = LNET_RC_STATE_UNLINKED; - mutex_up(&the_lnet.ln_rc_signal); - return; - } - - LASSERT (event->type == LNET_EVENT_SEND || - event->type == LNET_EVENT_REPLY); - - nid = (event->type == LNET_EVENT_SEND) ? - event->target.nid : event->initiator.nid; - - lp = lnet_find_peer_locked(nid); - if (lp == NULL) { - /* router may have been removed */ - CDEBUG(D_NET, "Router %s not found\n", libcfs_nid2str(nid)); - return; - } - - if (event->type == LNET_EVENT_SEND) /* re-enable another ping */ - lp->lp_ping_notsent = 0; - - if (lnet_isrouter(lp) && /* ignore if no longer a router */ - (event->status != 0 || - event->type == LNET_EVENT_REPLY)) { - - /* A successful REPLY means the router is up. If _any_ comms - * to the router fail I assume it's down (this will happen if - * we ping alive routers to try to detect router death before - * apps get burned). */ - - lnet_notify_locked(lp, 1, (event->status == 0), - cfs_time_current_sec()); - - /* The router checker will wake up very shortly and do the - * actual notification. - * XXX If 'lp' stops being a router before then, it will still - * have the notification pending!!! */ - } - - /* This decref will NOT drop LNET_LOCK (it had to have 1 ref when it - * was in the peer table and I've not dropped the lock, so no-one else - * can have reduced the refcount) */ - LASSERT(lp->lp_refcount > 1); - - lnet_peer_decref_locked(lp); -} - -static int -lnet_router_checker(void *arg) -{ - static lnet_ping_info_t pinginfo; - - int rc; - lnet_handle_md_t mdh; - lnet_peer_t *rtr; - struct list_head *entry; - time_t now; - lnet_process_id_t rtr_id; - int secs; - - cfs_daemonize("router_checker"); - cfs_block_allsigs(); - - rtr_id.pid = LUSTRE_SRV_LNET_PID; - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - rc = LNetMDBind((lnet_md_t){.start = &pinginfo, - .length = sizeof(pinginfo), - .threshold = LNET_MD_THRESH_INF, - .options = LNET_MD_TRUNCATE, - .eq_handle = the_lnet.ln_rc_eqh}, - LNET_UNLINK, - &mdh); - - if (rc < 0) { - CERROR("Can't bind MD: %d\n", rc); - the_lnet.ln_rc_state = rc; - mutex_up(&the_lnet.ln_rc_signal); - return rc; - } - - LASSERT (rc == 0); - - the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING; - mutex_up(&the_lnet.ln_rc_signal); /* let my parent go */ - - while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) { - __u64 version; - - LNET_LOCK(); -rescan: - version = the_lnet.ln_routers_version; - - list_for_each (entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, lnet_peer_t, lp_rtr_list); - - lnet_peer_addref_locked(rtr); - - now = cfs_time_current_sec(); - - if (rtr->lp_ping_deadline != 0 && /* ping timed out? */ - now > rtr->lp_ping_deadline) - lnet_notify_locked(rtr, 1, 0, now); - - LNET_UNLOCK(); - - /* Run any outstanding notificiations */ - lnet_do_notify(rtr); - - if (rtr->lp_alive) { - secs = live_router_check_interval; - } else { - secs = dead_router_check_interval; - } - if (secs <= 0) - secs = 0; - - if (secs != 0 && - !rtr->lp_ping_notsent && - now > rtr->lp_ping_timestamp + secs) { - CDEBUG(D_NET, "Check: %s\n", - libcfs_nid2str(rtr->lp_nid)); - - LNET_LOCK(); - rtr_id.nid = rtr->lp_nid; - rtr->lp_ping_notsent = 1; - rtr->lp_ping_timestamp = now; - - if (rtr->lp_ping_deadline == 0) - rtr->lp_ping_deadline = - now + router_ping_timeout; - - LNET_UNLOCK(); - - LNetGet(LNET_NID_ANY, mdh, rtr_id, - LNET_RESERVED_PORTAL, - LNET_PROTO_PING_MATCHBITS, 0); - } - - LNET_LOCK(); - lnet_peer_decref_locked(rtr); - - if (version != the_lnet.ln_routers_version) { - /* the routers list has changed */ - goto rescan; - } - } - - LNET_UNLOCK(); - - /* Call cfs_pause() here always adds 1 to load average - * because kernel counts # active tasks as nr_running - * + nr_uninterruptible. */ - set_current_state(CFS_TASK_INTERRUPTIBLE); - cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, - cfs_time_seconds(1)); - } - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_STOPTHREAD); - the_lnet.ln_rc_state = LNET_RC_STATE_UNLINKING; - - rc = LNetMDUnlink(mdh); - LASSERT (rc == 0); - - /* The unlink event callback will signal final completion */ - return 0; -} - - -void -lnet_wait_known_routerstate(void) -{ - lnet_peer_t *rtr; - struct list_head *entry; - int all_known; - - for (;;) { - LNET_LOCK(); - - all_known = 1; - list_for_each (entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, lnet_peer_t, lp_rtr_list); - - if (rtr->lp_alive_count == 0) { - all_known = 0; - break; - } - } - - LNET_UNLOCK(); - - if (all_known) - return; - - cfs_pause(cfs_time_seconds(1)); - } -} - -void -lnet_router_checker_stop(void) -{ - int rc; - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING || - the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN) - return; - - the_lnet.ln_rc_state = LNET_RC_STATE_STOPTHREAD; - /* block until event callback signals exit */ - mutex_down(&the_lnet.ln_rc_signal); - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_UNLINKED); - - rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT (rc == 0); - - the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; -} - -int -lnet_router_checker_start(void) -{ - int rc; - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - if (check_routers_before_use && - dead_router_check_interval <= 0) { - LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be" - " set if 'check_routers_before_use' is set" - "\n"); - return -EINVAL; - } - - if (live_router_check_interval <= 0 && - dead_router_check_interval <= 0) - return 0; - - init_mutex_locked(&the_lnet.ln_rc_signal); - - /* EQ size doesn't matter; the callback is guaranteed to get every - * event */ - rc = LNetEQAlloc(1, lnet_router_checker_event, - &the_lnet.ln_rc_eqh); - if (rc != 0) { - CERROR("Can't allocate EQ: %d\n", rc); - return -ENOMEM; - } - - rc = (int)cfs_kernel_thread(lnet_router_checker, NULL, 0); - if (rc < 0) { - CERROR("Can't start router checker thread: %d\n", rc); - goto failed; - } - - mutex_down(&the_lnet.ln_rc_signal); /* wait for checker to startup */ - - rc = the_lnet.ln_rc_state; - if (rc < 0) { - the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; - goto failed; - } - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); - - if (check_routers_before_use) { - /* Note that a helpful side-effect of pinging all known routers - * at startup is that it makes them drop stale connections they - * may have to a previous instance of me. */ - lnet_wait_known_routerstate(); - } - - return 0; - - failed: - rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT (rc == 0); - return rc; -} - -void -lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages) -{ - int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]); - - while (--npages >= 0) - cfs_free_page(rb->rb_kiov[npages].kiov_page); - - LIBCFS_FREE(rb, sz); -} - -lnet_rtrbuf_t * -lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp) -{ - int npages = rbp->rbp_npages; - int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]); - struct page *page; - lnet_rtrbuf_t *rb; - int i; - - LIBCFS_ALLOC(rb, sz); - if (rb == NULL) - return NULL; - - rb->rb_pool = rbp; - - for (i = 0; i < npages; i++) { - page = cfs_alloc_page(CFS_ALLOC_ZERO | CFS_ALLOC_STD); - if (page == NULL) { - while (--i >= 0) - cfs_free_page(rb->rb_kiov[i].kiov_page); - - LIBCFS_FREE(rb, sz); - return NULL; - } - - rb->rb_kiov[i].kiov_len = CFS_PAGE_SIZE; - rb->rb_kiov[i].kiov_offset = 0; - rb->rb_kiov[i].kiov_page = page; - } - - return rb; -} - -void -lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp) -{ - int npages = rbp->rbp_npages; - int nbuffers = 0; - lnet_rtrbuf_t *rb; - - LASSERT (list_empty(&rbp->rbp_msgs)); - LASSERT (rbp->rbp_credits == rbp->rbp_nbuffers); - - while (!list_empty(&rbp->rbp_bufs)) { - LASSERT (rbp->rbp_credits > 0); - - rb = list_entry(rbp->rbp_bufs.next, - lnet_rtrbuf_t, rb_list); - list_del(&rb->rb_list); - lnet_destroy_rtrbuf(rb, npages); - nbuffers++; - } - - LASSERT (rbp->rbp_nbuffers == nbuffers); - LASSERT (rbp->rbp_credits == nbuffers); - - rbp->rbp_nbuffers = rbp->rbp_credits = 0; -} - -int -lnet_rtrpool_alloc_bufs(lnet_rtrbufpool_t *rbp, int nbufs) -{ - lnet_rtrbuf_t *rb; - int i; - - if (rbp->rbp_nbuffers != 0) { - LASSERT (rbp->rbp_nbuffers == nbufs); - return 0; - } - - for (i = 0; i < nbufs; i++) { - rb = lnet_new_rtrbuf(rbp); - - if (rb == NULL) { - CERROR("Failed to allocate %d router bufs of %d pages\n", - nbufs, rbp->rbp_npages); - return -ENOMEM; - } - - rbp->rbp_nbuffers++; - rbp->rbp_credits++; - rbp->rbp_mincredits++; - list_add(&rb->rb_list, &rbp->rbp_bufs); - - /* No allocation "under fire" */ - /* Otherwise we'd need code to schedule blocked msgs etc */ - LASSERT (!the_lnet.ln_routing); - } - - LASSERT (rbp->rbp_credits == nbufs); - return 0; -} - -void -lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages) -{ - CFS_INIT_LIST_HEAD(&rbp->rbp_msgs); - CFS_INIT_LIST_HEAD(&rbp->rbp_bufs); - - rbp->rbp_npages = npages; - rbp->rbp_credits = 0; - rbp->rbp_mincredits = 0; -} - -void -lnet_free_rtrpools(void) -{ - lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[0]); - lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[1]); - lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[2]); -} - -void -lnet_init_rtrpools(void) -{ - int small_pages = 1; - int large_pages = (LNET_MTU + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; - - lnet_rtrpool_init(&the_lnet.ln_rtrpools[0], 0); - lnet_rtrpool_init(&the_lnet.ln_rtrpools[1], small_pages); - lnet_rtrpool_init(&the_lnet.ln_rtrpools[2], large_pages); -} - - -int -lnet_alloc_rtrpools(int im_a_router) -{ - int rc; - - if (!strcmp(forwarding, "")) { - /* not set either way */ - if (!im_a_router) - return 0; - } else if (!strcmp(forwarding, "disabled")) { - /* explicitly disabled */ - return 0; - } else if (!strcmp(forwarding, "enabled")) { - /* explicitly enabled */ - } else { - LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either " - "'enabled' or 'disabled'\n"); - return -EINVAL; - } - - if (tiny_router_buffers <= 0) { - LCONSOLE_ERROR_MSG(0x10c, "tiny_router_buffers=%d invalid when " - "routing enabled\n", tiny_router_buffers); - rc = -EINVAL; - goto failed; - } - - rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[0], - tiny_router_buffers); - if (rc != 0) - goto failed; - - if (small_router_buffers <= 0) { - LCONSOLE_ERROR_MSG(0x10d, "small_router_buffers=%d invalid when" - " routing enabled\n", small_router_buffers); - rc = -EINVAL; - goto failed; - } - - rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[1], - small_router_buffers); - if (rc != 0) - goto failed; - - if (large_router_buffers <= 0) { - LCONSOLE_ERROR_MSG(0x10e, "large_router_buffers=%d invalid when" - " routing enabled\n", large_router_buffers); - rc = -EINVAL; - goto failed; - } - - rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[2], - large_router_buffers); - if (rc != 0) - goto failed; - - LNET_LOCK(); - the_lnet.ln_routing = 1; - LNET_UNLOCK(); - - return 0; - - failed: - lnet_free_rtrpools(); - return rc; -} - -#else - -int -lnet_peers_start_down(void) -{ - return 0; -} - -void -lnet_router_checker_stop(void) -{ - return; -} - -int -lnet_router_checker_start(void) -{ - return 0; -} - -void -lnet_free_rtrpools (void) -{ -} - -void -lnet_init_rtrpools (void) -{ -} - -int -lnet_alloc_rtrpools (int im_a_arouter) -{ - return 0; -} - -#endif diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c deleted file mode 100644 index a2a1221175ccf83edbcdf8446c26e5b9fb162968..0000000000000000000000000000000000000000 --- a/lnet/lnet/router_proc.c +++ /dev/null @@ -1,1095 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <libcfs/libcfs.h> -#include <lnet/lib-lnet.h> - -#if defined(__KERNEL__) && defined(LNET_ROUTER) - -#include <linux/seq_file.h> -#include <linux/lustre_compat25.h> - -/* this is really lnet_proc.c */ - -#define LNET_PROC_STATS "sys/lnet/stats" -#define LNET_PROC_ROUTES "sys/lnet/routes" -#define LNET_PROC_ROUTERS "sys/lnet/routers" -#define LNET_PROC_PEERS "sys/lnet/peers" -#define LNET_PROC_BUFFERS "sys/lnet/buffers" -#define LNET_PROC_NIS "sys/lnet/nis" - -static int -lnet_router_proc_stats_read (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - lnet_counters_t *ctrs; - int rc; - - *start = page; - *eof = 1; - if (off != 0) - return 0; - - LIBCFS_ALLOC(ctrs, sizeof(*ctrs)); - if (ctrs == NULL) - return -ENOMEM; - - LNET_LOCK(); - *ctrs = the_lnet.ln_counters; - LNET_UNLOCK(); - - rc = sprintf(page, - "%u %u %u %u %u %u %u "LPU64" "LPU64" "LPU64" "LPU64"\n", - ctrs->msgs_alloc, ctrs->msgs_max, - ctrs->errors, - ctrs->send_count, ctrs->recv_count, - ctrs->route_count, ctrs->drop_count, - ctrs->send_length, ctrs->recv_length, - ctrs->route_length, ctrs->drop_length); - - LIBCFS_FREE(ctrs, sizeof(*ctrs)); - return rc; -} - -static int -lnet_router_proc_stats_write(struct file *file, const char *ubuffer, - unsigned long count, void *data) -{ - LNET_LOCK(); - memset(&the_lnet.ln_counters, 0, sizeof(the_lnet.ln_counters)); - LNET_UNLOCK(); - - return (count); -} - -typedef struct { - __u64 lrsi_version; - lnet_remotenet_t *lrsi_net; - lnet_route_t *lrsi_route; - loff_t lrsi_off; -} lnet_route_seq_iterator_t; - -int -lnet_route_seq_seek (lnet_route_seq_iterator_t *lrsi, loff_t off) -{ - struct list_head *n; - struct list_head *r; - int rc; - loff_t here; - - if (off == 0) { - lrsi->lrsi_net = NULL; - lrsi->lrsi_route = NULL; - lrsi->lrsi_off = 0; - return 0; - } - - LNET_LOCK(); - - if (lrsi->lrsi_net != NULL && - lrsi->lrsi_version != the_lnet.ln_remote_nets_version) { - /* tables have changed */ - rc = -ESTALE; - goto out; - } - - if (lrsi->lrsi_net == NULL || lrsi->lrsi_off > off) { - /* search from start */ - n = the_lnet.ln_remote_nets.next; - r = NULL; - here = 1; - } else { - /* continue search */ - n = &lrsi->lrsi_net->lrn_list; - r = &lrsi->lrsi_route->lr_list; - here = lrsi->lrsi_off; - } - - lrsi->lrsi_version = the_lnet.ln_remote_nets_version; - lrsi->lrsi_off = off; - - while (n != &the_lnet.ln_remote_nets) { - lnet_remotenet_t *rnet = - list_entry(n, lnet_remotenet_t, lrn_list); - - if (r == NULL) - r = rnet->lrn_routes.next; - - while (r != &rnet->lrn_routes) { - lnet_route_t *re = - list_entry(r, lnet_route_t, - lr_list); - - if (here == off) { - lrsi->lrsi_net = rnet; - lrsi->lrsi_route = re; - rc = 0; - goto out; - } - - r = r->next; - here++; - } - - r = NULL; - n = n->next; - } - - lrsi->lrsi_net = NULL; - lrsi->lrsi_route = NULL; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_route_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_route_seq_iterator_t *lrsi; - int rc; - - LIBCFS_ALLOC(lrsi, sizeof(*lrsi)); - if (lrsi == NULL) - return NULL; - - lrsi->lrsi_net = NULL; - rc = lnet_route_seq_seek(lrsi, *pos); - if (rc == 0) - return lrsi; - - LIBCFS_FREE(lrsi, sizeof(*lrsi)); - return NULL; -} - -static void -lnet_route_seq_stop (struct seq_file *s, void *iter) -{ - lnet_route_seq_iterator_t *lrsi = iter; - - if (lrsi != NULL) - LIBCFS_FREE(lrsi, sizeof(*lrsi)); -} - -static void * -lnet_route_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_route_seq_iterator_t *lrsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_route_seq_seek(lrsi, next); - if (rc != 0) { - LIBCFS_FREE(lrsi, sizeof(*lrsi)); - return NULL; - } - - *pos = next; - return lrsi; -} - -static int -lnet_route_seq_show (struct seq_file *s, void *iter) -{ - lnet_route_seq_iterator_t *lrsi = iter; - __u32 net; - unsigned int hops; - lnet_nid_t nid; - int alive; - - if (lrsi->lrsi_off == 0) { - seq_printf(s, "Routing %s\n", - the_lnet.ln_routing ? "enabled" : "disabled"); - seq_printf(s, "%-8s %4s %7s %s\n", - "net", "hops", "state", "router"); - return 0; - } - - LASSERT (lrsi->lrsi_net != NULL); - LASSERT (lrsi->lrsi_route != NULL); - - LNET_LOCK(); - - if (lrsi->lrsi_version != the_lnet.ln_remote_nets_version) { - LNET_UNLOCK(); - return -ESTALE; - } - - net = lrsi->lrsi_net->lrn_net; - hops = lrsi->lrsi_net->lrn_hops; - nid = lrsi->lrsi_route->lr_gateway->lp_nid; - alive = lrsi->lrsi_route->lr_gateway->lp_alive; - - LNET_UNLOCK(); - - seq_printf(s, "%-8s %4u %7s %s\n", libcfs_net2str(net), hops, - alive ? "up" : "down", libcfs_nid2str(nid)); - return 0; -} - -static struct seq_operations lnet_routes_sops = { - .start = lnet_route_seq_start, - .stop = lnet_route_seq_stop, - .next = lnet_route_seq_next, - .show = lnet_route_seq_show, -}; - -static int -lnet_route_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_routes_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_routes_fops = { - .owner = THIS_MODULE, - .open = lnet_route_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -typedef struct { - __u64 lrtrsi_version; - lnet_peer_t *lrtrsi_router; - loff_t lrtrsi_off; -} lnet_router_seq_iterator_t; - -int -lnet_router_seq_seek (lnet_router_seq_iterator_t *lrtrsi, loff_t off) -{ - struct list_head *r; - lnet_peer_t *lp; - int rc; - loff_t here; - - if (off == 0) { - lrtrsi->lrtrsi_router = NULL; - lrtrsi->lrtrsi_off = 0; - return 0; - } - - LNET_LOCK(); - - lp = lrtrsi->lrtrsi_router; - - if (lp != NULL && - lrtrsi->lrtrsi_version != the_lnet.ln_routers_version) { - /* tables have changed */ - rc = -ESTALE; - goto out; - } - - if (lp == NULL || lrtrsi->lrtrsi_off > off) { - /* search from start */ - r = the_lnet.ln_routers.next; - here = 1; - } else { - /* continue search */ - r = &lp->lp_rtr_list; - here = lrtrsi->lrtrsi_off; - } - - lrtrsi->lrtrsi_version = the_lnet.ln_routers_version; - lrtrsi->lrtrsi_off = off; - - while (r != &the_lnet.ln_routers) { - lnet_peer_t *rtr = list_entry(r, - lnet_peer_t, - lp_rtr_list); - - if (here == off) { - lrtrsi->lrtrsi_router = rtr; - rc = 0; - goto out; - } - - r = r->next; - here++; - } - - lrtrsi->lrtrsi_router = NULL; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_router_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_router_seq_iterator_t *lrtrsi; - int rc; - - LIBCFS_ALLOC(lrtrsi, sizeof(*lrtrsi)); - if (lrtrsi == NULL) - return NULL; - - lrtrsi->lrtrsi_router = NULL; - rc = lnet_router_seq_seek(lrtrsi, *pos); - if (rc == 0) - return lrtrsi; - - LIBCFS_FREE(lrtrsi, sizeof(*lrtrsi)); - return NULL; -} - -static void -lnet_router_seq_stop (struct seq_file *s, void *iter) -{ - lnet_router_seq_iterator_t *lrtrsi = iter; - - if (lrtrsi != NULL) - LIBCFS_FREE(lrtrsi, sizeof(*lrtrsi)); -} - -static void * -lnet_router_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_router_seq_iterator_t *lrtrsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_router_seq_seek(lrtrsi, next); - if (rc != 0) { - LIBCFS_FREE(lrtrsi, sizeof(*lrtrsi)); - return NULL; - } - - *pos = next; - return lrtrsi; -} - -static int -lnet_router_seq_show (struct seq_file *s, void *iter) -{ - lnet_router_seq_iterator_t *lrtrsi = iter; - lnet_peer_t *lp; - lnet_nid_t nid; - int alive; - int alive_cnt; - int nrefs; - int nrtrrefs; - time_t last_ping; - - if (lrtrsi->lrtrsi_off == 0) { - seq_printf(s, "%-4s %7s %9s %6s %12s %s\n", - "ref", "rtr_ref", "alive_cnt", "state", "last_ping", "router"); - return 0; - } - - lp = lrtrsi->lrtrsi_router; - LASSERT (lp != NULL); - - LNET_LOCK(); - - if (lrtrsi->lrtrsi_version != the_lnet.ln_routers_version) { - LNET_UNLOCK(); - return -ESTALE; - } - - nid = lp->lp_nid; - alive = lp->lp_alive; - alive_cnt = lp->lp_alive_count; - nrefs = lp->lp_refcount; - nrtrrefs = lp->lp_rtr_refcount; - last_ping = lp->lp_ping_timestamp; - - LNET_UNLOCK(); - - seq_printf(s, - "%-4d %7d %9d %6s %12lu %s\n", nrefs, nrtrrefs, - alive_cnt, alive ? "up" : "down", - last_ping, libcfs_nid2str(nid)); - return 0; -} - -static struct seq_operations lnet_routers_sops = { - .start = lnet_router_seq_start, - .stop = lnet_router_seq_stop, - .next = lnet_router_seq_next, - .show = lnet_router_seq_show, -}; - -static int -lnet_router_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_routers_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_routers_fops = { - .owner = THIS_MODULE, - .open = lnet_router_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -typedef struct { - unsigned long long lpsi_version; - int lpsi_idx; - lnet_peer_t *lpsi_peer; - loff_t lpsi_off; -} lnet_peer_seq_iterator_t; - -int -lnet_peer_seq_seek (lnet_peer_seq_iterator_t *lpsi, loff_t off) -{ - int idx; - struct list_head *p; - loff_t here; - int rc; - - if (off == 0) { - lpsi->lpsi_idx = 0; - lpsi->lpsi_peer = NULL; - lpsi->lpsi_off = 0; - return 0; - } - - LNET_LOCK(); - - if (lpsi->lpsi_peer != NULL && - lpsi->lpsi_version != the_lnet.ln_peertable_version) { - /* tables have changed */ - rc = -ESTALE; - goto out; - } - - if (lpsi->lpsi_peer == NULL || - lpsi->lpsi_off > off) { - /* search from start */ - idx = 0; - p = NULL; - here = 1; - } else { - /* continue search */ - idx = lpsi->lpsi_idx; - p = &lpsi->lpsi_peer->lp_hashlist; - here = lpsi->lpsi_off; - } - - lpsi->lpsi_version = the_lnet.ln_peertable_version; - lpsi->lpsi_off = off; - - while (idx < LNET_PEER_HASHSIZE) { - if (p == NULL) - p = the_lnet.ln_peer_hash[idx].next; - - while (p != &the_lnet.ln_peer_hash[idx]) { - lnet_peer_t *lp = list_entry(p, lnet_peer_t, - lp_hashlist); - - if (here == off) { - lpsi->lpsi_idx = idx; - lpsi->lpsi_peer = lp; - rc = 0; - goto out; - } - - here++; - p = lp->lp_hashlist.next; - } - - p = NULL; - idx++; - } - - lpsi->lpsi_idx = 0; - lpsi->lpsi_peer = NULL; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_peer_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_peer_seq_iterator_t *lpsi; - int rc; - - LIBCFS_ALLOC(lpsi, sizeof(*lpsi)); - if (lpsi == NULL) - return NULL; - - lpsi->lpsi_idx = 0; - lpsi->lpsi_peer = NULL; - rc = lnet_peer_seq_seek(lpsi, *pos); - if (rc == 0) - return lpsi; - - LIBCFS_FREE(lpsi, sizeof(*lpsi)); - return NULL; -} - -static void -lnet_peer_seq_stop (struct seq_file *s, void *iter) -{ - lnet_peer_seq_iterator_t *lpsi = iter; - - if (lpsi != NULL) - LIBCFS_FREE(lpsi, sizeof(*lpsi)); -} - -static void * -lnet_peer_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_peer_seq_iterator_t *lpsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_peer_seq_seek(lpsi, next); - if (rc != 0) { - LIBCFS_FREE(lpsi, sizeof(*lpsi)); - return NULL; - } - - *pos = next; - return lpsi; -} - -static int -lnet_peer_seq_show (struct seq_file *s, void *iter) -{ - lnet_peer_seq_iterator_t *lpsi = iter; - lnet_peer_t *lp; - lnet_nid_t nid; - int maxcr; - int mintxcr; - int txcr; - int minrtrcr; - int rtrcr; - int alive; - int rtr; - int txqnob; - int nrefs; - - if (lpsi->lpsi_off == 0) { - seq_printf(s, "%-24s %4s %5s %5s %5s %5s %5s %5s %s\n", - "nid", "refs", "state", "max", - "rtr", "min", "tx", "min", "queue"); - return 0; - } - - LASSERT (lpsi->lpsi_peer != NULL); - - LNET_LOCK(); - - if (lpsi->lpsi_version != the_lnet.ln_peertable_version) { - LNET_UNLOCK(); - return -ESTALE; - } - - lp = lpsi->lpsi_peer; - - nid = lp->lp_nid; - maxcr = lp->lp_ni->ni_peertxcredits; - txcr = lp->lp_txcredits; - mintxcr = lp->lp_mintxcredits; - rtrcr = lp->lp_rtrcredits; - minrtrcr = lp->lp_minrtrcredits; - rtr = lnet_isrouter(lp); - alive = lp->lp_alive; - txqnob = lp->lp_txqnob; - nrefs = lp->lp_refcount; - - LNET_UNLOCK(); - - seq_printf(s, "%-24s %4d %5s %5d %5d %5d %5d %5d %d\n", - libcfs_nid2str(nid), nrefs, - !rtr ? "~rtr" : (alive ? "up" : "down"), - maxcr, rtrcr, minrtrcr, txcr, mintxcr, txqnob); - return 0; -} - -static struct seq_operations lnet_peer_sops = { - .start = lnet_peer_seq_start, - .stop = lnet_peer_seq_stop, - .next = lnet_peer_seq_next, - .show = lnet_peer_seq_show, -}; - -static int -lnet_peer_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_peer_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_peer_fops = { - .owner = THIS_MODULE, - .open = lnet_peer_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -typedef struct { - int lbsi_idx; - loff_t lbsi_off; -} lnet_buffer_seq_iterator_t; - -int -lnet_buffer_seq_seek (lnet_buffer_seq_iterator_t *lbsi, loff_t off) -{ - int idx; - loff_t here; - int rc; - - if (off == 0) { - lbsi->lbsi_idx = -1; - lbsi->lbsi_off = 0; - return 0; - } - - LNET_LOCK(); - - if (lbsi->lbsi_idx < 0 || - lbsi->lbsi_off > off) { - /* search from start */ - idx = 0; - here = 1; - } else { - /* continue search */ - idx = lbsi->lbsi_idx; - here = lbsi->lbsi_off; - } - - lbsi->lbsi_off = off; - - while (idx < LNET_NRBPOOLS) { - if (here == off) { - lbsi->lbsi_idx = idx; - rc = 0; - goto out; - } - here++; - idx++; - } - - lbsi->lbsi_idx = -1; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_buffer_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_buffer_seq_iterator_t *lbsi; - int rc; - - LIBCFS_ALLOC(lbsi, sizeof(*lbsi)); - if (lbsi == NULL) - return NULL; - - lbsi->lbsi_idx = -1; - rc = lnet_buffer_seq_seek(lbsi, *pos); - if (rc == 0) - return lbsi; - - LIBCFS_FREE(lbsi, sizeof(*lbsi)); - return NULL; -} - -static void -lnet_buffer_seq_stop (struct seq_file *s, void *iter) -{ - lnet_buffer_seq_iterator_t *lbsi = iter; - - if (lbsi != NULL) - LIBCFS_FREE(lbsi, sizeof(*lbsi)); -} - -static void * -lnet_buffer_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_buffer_seq_iterator_t *lbsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_buffer_seq_seek(lbsi, next); - if (rc != 0) { - LIBCFS_FREE(lbsi, sizeof(*lbsi)); - return NULL; - } - - *pos = next; - return lbsi; -} - -static int -lnet_buffer_seq_show (struct seq_file *s, void *iter) -{ - lnet_buffer_seq_iterator_t *lbsi = iter; - lnet_rtrbufpool_t *rbp; - int npages; - int nbuf; - int cr; - int mincr; - - if (lbsi->lbsi_off == 0) { - seq_printf(s, "%5s %5s %7s %7s\n", - "pages", "count", "credits", "min"); - return 0; - } - - LASSERT (lbsi->lbsi_idx >= 0 && lbsi->lbsi_idx < LNET_NRBPOOLS); - - LNET_LOCK(); - - rbp = &the_lnet.ln_rtrpools[lbsi->lbsi_idx]; - - npages = rbp->rbp_npages; - nbuf = rbp->rbp_nbuffers; - cr = rbp->rbp_credits; - mincr = rbp->rbp_mincredits; - - LNET_UNLOCK(); - - seq_printf(s, "%5d %5d %7d %7d\n", - npages, nbuf, cr, mincr); - return 0; -} - -static struct seq_operations lnet_buffer_sops = { - .start = lnet_buffer_seq_start, - .stop = lnet_buffer_seq_stop, - .next = lnet_buffer_seq_next, - .show = lnet_buffer_seq_show, -}; - -static int -lnet_buffer_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_buffer_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_buffers_fops = { - .owner = THIS_MODULE, - .open = lnet_buffer_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -typedef struct { - lnet_ni_t *lnsi_ni; - loff_t lnsi_off; -} lnet_ni_seq_iterator_t; - -int -lnet_ni_seq_seek (lnet_ni_seq_iterator_t *lnsi, loff_t off) -{ - struct list_head *n; - loff_t here; - int rc; - - if (off == 0) { - lnsi->lnsi_ni = NULL; - lnsi->lnsi_off = 0; - return 0; - } - - LNET_LOCK(); - - if (lnsi->lnsi_ni == NULL || - lnsi->lnsi_off > off) { - /* search from start */ - n = NULL; - here = 1; - } else { - /* continue search */ - n = &lnsi->lnsi_ni->ni_list; - here = lnsi->lnsi_off; - } - - lnsi->lnsi_off = off; - - if (n == NULL) - n = the_lnet.ln_nis.next; - - while (n != &the_lnet.ln_nis) { - if (here == off) { - lnsi->lnsi_ni = list_entry(n, lnet_ni_t, ni_list); - rc = 0; - goto out; - } - here++; - n = n->next; - } - - lnsi->lnsi_ni = NULL; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_ni_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_ni_seq_iterator_t *lnsi; - int rc; - - LIBCFS_ALLOC(lnsi, sizeof(*lnsi)); - if (lnsi == NULL) - return NULL; - - lnsi->lnsi_ni = NULL; - rc = lnet_ni_seq_seek(lnsi, *pos); - if (rc == 0) - return lnsi; - - LIBCFS_FREE(lnsi, sizeof(*lnsi)); - return NULL; -} - -static void -lnet_ni_seq_stop (struct seq_file *s, void *iter) -{ - lnet_ni_seq_iterator_t *lnsi = iter; - - if (lnsi != NULL) - LIBCFS_FREE(lnsi, sizeof(*lnsi)); -} - -static void * -lnet_ni_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_ni_seq_iterator_t *lnsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_ni_seq_seek(lnsi, next); - if (rc != 0) { - LIBCFS_FREE(lnsi, sizeof(*lnsi)); - return NULL; - } - - *pos = next; - return lnsi; -} - -static int -lnet_ni_seq_show (struct seq_file *s, void *iter) -{ - lnet_ni_seq_iterator_t *lnsi = iter; - lnet_ni_t *ni; - int maxtxcr; - int txcr; - int mintxcr; - int npeertxcr; - lnet_nid_t nid; - int nref; - - if (lnsi->lnsi_off == 0) { - seq_printf(s, "%-24s %4s %4s %5s %5s %5s\n", - "nid", "refs", "peer", "max", "tx", "min"); - return 0; - } - - LASSERT (lnsi->lnsi_ni != NULL); - - LNET_LOCK(); - - ni = lnsi->lnsi_ni; - - maxtxcr = ni->ni_maxtxcredits; - txcr = ni->ni_txcredits; - mintxcr = ni->ni_mintxcredits; - npeertxcr = ni->ni_peertxcredits; - nid = ni->ni_nid; - nref = ni->ni_refcount; - - LNET_UNLOCK(); - - seq_printf(s, "%-24s %4d %4d %5d %5d %5d\n", - libcfs_nid2str(nid), nref, - npeertxcr, maxtxcr, txcr, mintxcr); - return 0; -} - -static struct seq_operations lnet_ni_sops = { - .start = lnet_ni_seq_start, - .stop = lnet_ni_seq_stop, - .next = lnet_ni_seq_next, - .show = lnet_ni_seq_show, -}; - -static int -lnet_ni_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_ni_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_ni_fops = { - .owner = THIS_MODULE, - .open = lnet_ni_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -void -lnet_proc_init(void) -{ - struct proc_dir_entry *pde; - - /* Initialize LNET_PROC_STATS */ - pde = create_proc_entry (LNET_PROC_STATS, 0644, NULL); - if (pde == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_STATS); - return; - } - - pde->data = NULL; - pde->read_proc = lnet_router_proc_stats_read; - pde->write_proc = lnet_router_proc_stats_write; - - /* Initialize LNET_PROC_ROUTES */ - pde = create_proc_entry (LNET_PROC_ROUTES, 0444, NULL); - if (pde == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_ROUTES); - return; - } - - pde->proc_fops = &lnet_routes_fops; - pde->data = NULL; - - /* Initialize LNET_PROC_ROUTERS */ - pde = create_proc_entry (LNET_PROC_ROUTERS, 0444, NULL); - if (pde == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_ROUTERS); - return; - } - - pde->proc_fops = &lnet_routers_fops; - pde->data = NULL; - - /* Initialize LNET_PROC_PEERS */ - pde = create_proc_entry (LNET_PROC_PEERS, 0444, NULL); - if (pde == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_PEERS); - return; - } - - pde->proc_fops = &lnet_peer_fops; - pde->data = NULL; - - /* Initialize LNET_PROC_BUFFERS */ - pde = create_proc_entry (LNET_PROC_BUFFERS, 0444, NULL); - if (pde == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_BUFFERS); - return; - } - - pde->proc_fops = &lnet_buffers_fops; - pde->data = NULL; - - /* Initialize LNET_PROC_NIS */ - pde = create_proc_entry (LNET_PROC_NIS, 0444, NULL); - if (pde == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_NIS); - return; - } - - pde->proc_fops = &lnet_ni_fops; - pde->data = NULL; -} - -void -lnet_proc_fini(void) -{ - remove_proc_entry(LNET_PROC_STATS, 0); - remove_proc_entry(LNET_PROC_ROUTES, 0); - remove_proc_entry(LNET_PROC_ROUTERS, 0); - remove_proc_entry(LNET_PROC_PEERS, 0); - remove_proc_entry(LNET_PROC_BUFFERS, 0); - remove_proc_entry(LNET_PROC_NIS, 0); -} - -#else - -void -lnet_proc_init(void) -{ -} - -void -lnet_proc_fini(void) -{ -} - -#endif diff --git a/lnet/router/.cvsignore b/lnet/router/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/router/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/selftest/.cvsignore b/lnet/selftest/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/selftest/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/selftest/Makefile.in b/lnet/selftest/Makefile.in deleted file mode 100644 index 8ebef7597554c11b5f6cbb8b77d9f50b21b600a3..0000000000000000000000000000000000000000 --- a/lnet/selftest/Makefile.in +++ /dev/null @@ -1,7 +0,0 @@ -MODULES := lnet_selftest - -lnet_selftest-objs := console.o conrpc.o conctl.o framework.o timer.o rpc.o workitem.o module.o ping_test.o brw_test.o - -default: all - -@INCLUDE_RULES@ diff --git a/lnet/selftest/autoMakefile.am b/lnet/selftest/autoMakefile.am deleted file mode 100644 index 36af9014125ae296f912761109086d9095975a3d..0000000000000000000000000000000000000000 --- a/lnet/selftest/autoMakefile.am +++ /dev/null @@ -1,23 +0,0 @@ -my_sources = console.c conrpc.c conctl.c console.h conrpc.h \ - framework.c timer.c rpc.c workitem.c module.c \ - ping_test.c brw_test.c - -if LIBLUSTRE -noinst_LIBRARIES= libselftest.a -libselftest_a_SOURCES= $(my_sources) -libselftest_a_CPPFLAGS = $(LLCPPFLAGS) -libselftest_a_CFLAGS = $(LLCFLAGS) -endif - -if MODULES - -if LINUX -modulenet_DATA = lnet_selftest$(KMODEXT) -endif # LINUX - -endif # MODULES - -install-data-hook: $(install_data_hook) - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ selftest -DIST_SOURCES = $(lnet_selftest-objs:%.o=%.c) console.h conrpc.h rpc.h selftest.h timer.h diff --git a/lnet/selftest/brw_test.c b/lnet/selftest/brw_test.c deleted file mode 100644 index 0d6ec0fccd47360cfb762e49819ff9cf20744806..0000000000000000000000000000000000000000 --- a/lnet/selftest/brw_test.c +++ /dev/null @@ -1,400 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Isaac Huang <isaac@clusterfs.com> - * - */ - -#include "selftest.h" - - -extern int brw_inject_errors; - -static void -brw_client_fini (sfw_test_instance_t *tsi) -{ - srpc_bulk_t *bulk; - sfw_test_unit_t *tsu; - - LASSERT (tsi->tsi_is_client); - - list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) { - bulk = tsu->tsu_private; - if (bulk == NULL) continue; - - srpc_free_bulk(bulk); - tsu->tsu_private = NULL; - } -} - -int -brw_client_init (sfw_test_instance_t *tsi) -{ - test_bulk_req_t *breq = &tsi->tsi_u.bulk; - int flags = breq->blk_flags; - int npg = breq->blk_npg; - srpc_bulk_t *bulk; - sfw_test_unit_t *tsu; - - LASSERT (tsi->tsi_is_client); - - if (npg > LNET_MAX_IOV || npg <= 0) - return -EINVAL; - - if (breq->blk_opc != LST_BRW_READ && breq->blk_opc != LST_BRW_WRITE) - return -EINVAL; - - if (flags != LST_BRW_CHECK_NONE && - flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE) - return -EINVAL; - - list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) { - bulk = srpc_alloc_bulk(npg, breq->blk_opc == LST_BRW_READ); - if (bulk == NULL) { - brw_client_fini(tsi); - return -ENOMEM; - } - - tsu->tsu_private = bulk; - } - - return 0; -} - -#define BRW_POISON 0xbeefbeefbeefbeefULL -#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL -#define BRW_MSIZE sizeof(__u64) - -int -brw_inject_one_error (void) -{ - struct timeval tv; - - if (brw_inject_errors <= 0) return 0; - -#ifndef __KERNEL__ - gettimeofday(&tv, NULL); -#else - do_gettimeofday(&tv); -#endif - - if ((tv.tv_usec & 1) == 0) return 0; - - return brw_inject_errors--; -} - -void -brw_fill_page (cfs_page_t *pg, int pattern, __u64 magic) -{ - char *addr = cfs_page_address(pg); - int i; - - LASSERT (addr != NULL); - - if (pattern == LST_BRW_CHECK_NONE) return; - - if (magic == BRW_MAGIC) - magic += brw_inject_one_error(); - - if (pattern == LST_BRW_CHECK_SIMPLE) { - memcpy(addr, &magic, BRW_MSIZE); - addr += CFS_PAGE_SIZE - BRW_MSIZE; - memcpy(addr, &magic, BRW_MSIZE); - return; - } - - if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < CFS_PAGE_SIZE / BRW_MSIZE; i++) - memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE); - return; - } - - LBUG (); - return; -} - -int -brw_check_page (cfs_page_t *pg, int pattern, __u64 magic) -{ - char *addr = cfs_page_address(pg); - __u64 data; - int i; - - LASSERT (addr != NULL); - - if (pattern == LST_BRW_CHECK_NONE) - return 0; - - if (pattern == LST_BRW_CHECK_SIMPLE) { - data = *((__u64 *) addr); - if (data != magic) goto bad_data; - - addr += CFS_PAGE_SIZE - BRW_MSIZE; - data = *((__u64 *) addr); - if (data != magic) goto bad_data; - - return 0; - } - - if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < CFS_PAGE_SIZE / BRW_MSIZE; i++) { - data = *(((__u64 *) addr) + i); - if (data != magic) goto bad_data; - } - - return 0; - } - - LBUG (); - -bad_data: - CERROR ("Bad data in page %p: "LPX64", "LPX64" expected\n", - pg, data, magic); - return 1; -} - -void -brw_fill_bulk (srpc_bulk_t *bk, int pattern, __u64 magic) -{ - int i; - cfs_page_t *pg; - - for (i = 0; i < bk->bk_niov; i++) { -#ifdef __KERNEL__ - pg = bk->bk_iovs[i].kiov_page; -#else - LASSERT (bk->bk_pages != NULL); - pg = bk->bk_pages[i]; -#endif - brw_fill_page(pg, pattern, magic); - } -} - -int -brw_check_bulk (srpc_bulk_t *bk, int pattern, __u64 magic) -{ - int i; - cfs_page_t *pg; - - for (i = 0; i < bk->bk_niov; i++) { -#ifdef __KERNEL__ - pg = bk->bk_iovs[i].kiov_page; -#else - LASSERT (bk->bk_pages != NULL); - pg = bk->bk_pages[i]; -#endif - if (brw_check_page(pg, pattern, magic) != 0) { - CERROR ("Bulk page %p (%d/%d) is corrupted!\n", - pg, i, bk->bk_niov); - return 1; - } - } - - return 0; -} - -static int -brw_client_prep_rpc (sfw_test_unit_t *tsu, - lnet_process_id_t dest, srpc_client_rpc_t **rpcpp) -{ - srpc_bulk_t *bulk = tsu->tsu_private; - sfw_test_instance_t *tsi = tsu->tsu_instance; - test_bulk_req_t *breq = &tsi->tsi_u.bulk; - int npg = breq->blk_npg; - int flags = breq->blk_flags; - srpc_client_rpc_t *rpc; - srpc_brw_reqst_t *req; - int rc; - - LASSERT (bulk != NULL); - LASSERT (bulk->bk_niov == npg); - - rc = sfw_create_test_rpc(tsu, dest, npg, npg * CFS_PAGE_SIZE, &rpc); - if (rc != 0) return rc; - - memcpy(&rpc->crpc_bulk, bulk, offsetof(srpc_bulk_t, bk_iovs[npg])); - if (breq->blk_opc == LST_BRW_WRITE) - brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC); - else - brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON); - - req = &rpc->crpc_reqstmsg.msg_body.brw_reqst; - req->brw_flags = flags; - req->brw_rw = breq->blk_opc; - req->brw_len = npg * CFS_PAGE_SIZE; - - *rpcpp = rpc; - return 0; -} - -static void -brw_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) -{ - __u64 magic = BRW_MAGIC; - sfw_test_instance_t *tsi = tsu->tsu_instance; - sfw_session_t *sn = tsi->tsi_batch->bat_session; - srpc_msg_t *msg = &rpc->crpc_replymsg; - srpc_brw_reply_t *reply = &msg->msg_body.brw_reply; - srpc_brw_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst; - - LASSERT (sn != NULL); - - if (rpc->crpc_status != 0) { - CERROR ("BRW RPC to %s failed with %d\n", - libcfs_id2str(rpc->crpc_dest), rpc->crpc_status); - if (!tsi->tsi_stopping) /* rpc could have been aborted */ - atomic_inc(&sn->sn_brw_errors); - goto out; - } - - if (msg->msg_magic != SRPC_MSG_MAGIC) { - __swab64s(&magic); - __swab32s(&reply->brw_status); - } - - CDEBUG (reply->brw_status ? D_WARNING : D_NET, - "BRW RPC to %s finished with brw_status: %d\n", - libcfs_id2str(rpc->crpc_dest), reply->brw_status); - - if (reply->brw_status != 0) { - atomic_inc(&sn->sn_brw_errors); - rpc->crpc_status = -reply->brw_status; - goto out; - } - - if (reqst->brw_rw == LST_BRW_WRITE) goto out; - - if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic) != 0) { - CERROR ("Bulk data from %s is corrupted!\n", - libcfs_id2str(rpc->crpc_dest)); - atomic_inc(&sn->sn_brw_errors); - rpc->crpc_status = -EBADMSG; - } - -out: -#ifndef __KERNEL__ - rpc->crpc_bulk.bk_pages = NULL; -#endif - return; -} - -void -brw_server_rpc_done (srpc_server_rpc_t *rpc) -{ - srpc_bulk_t *blk = rpc->srpc_bulk; - - if (blk == NULL) return; - - if (rpc->srpc_status != 0) - CERROR ("Bulk transfer %s %s has failed: %d\n", - blk->bk_sink ? "from" : "to", - libcfs_id2str(rpc->srpc_peer), rpc->srpc_status); - else - CDEBUG (D_NET, "Transfered %d pages bulk data %s %s\n", - blk->bk_niov, blk->bk_sink ? "from" : "to", - libcfs_id2str(rpc->srpc_peer)); - - sfw_free_pages(rpc); -} - -int -brw_bulk_ready (srpc_server_rpc_t *rpc, int status) -{ - __u64 magic = BRW_MAGIC; - srpc_brw_reply_t *reply = &rpc->srpc_replymsg.msg_body.brw_reply; - srpc_brw_reqst_t *reqst; - srpc_msg_t *reqstmsg; - - LASSERT (rpc->srpc_bulk != NULL); - LASSERT (rpc->srpc_reqstbuf != NULL); - - reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - reqst = &reqstmsg->msg_body.brw_reqst; - - if (status != 0) { - CERROR ("BRW bulk %s failed for RPC from %s: %d\n", - reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE", - libcfs_id2str(rpc->srpc_peer), status); - return -EIO; - } - - if (reqst->brw_rw == LST_BRW_READ) - return 0; - - if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) - __swab64s(&magic); - - if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic) != 0) { - CERROR ("Bulk data from %s is corrupted!\n", - libcfs_id2str(rpc->srpc_peer)); - reply->brw_status = EBADMSG; - } - - return 0; -} - -int -brw_server_handle (srpc_server_rpc_t *rpc) -{ - srpc_service_t *sv = rpc->srpc_service; - srpc_msg_t *replymsg = &rpc->srpc_replymsg; - srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - srpc_brw_reply_t *reply = &replymsg->msg_body.brw_reply; - srpc_brw_reqst_t *reqst = &reqstmsg->msg_body.brw_reqst; - int rc; - - LASSERT (sv->sv_id == SRPC_SERVICE_BRW); - - if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) { - LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - __swab32s(&reqstmsg->msg_type); - __swab32s(&reqst->brw_rw); - __swab32s(&reqst->brw_len); - __swab32s(&reqst->brw_flags); - __swab64s(&reqst->brw_rpyid); - __swab64s(&reqst->brw_bulkid); - } - LASSERT (reqstmsg->msg_type == srpc_service2request(sv->sv_id)); - - rpc->srpc_done = brw_server_rpc_done; - - if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) || - reqst->brw_len == 0 || (reqst->brw_len & ~CFS_PAGE_MASK) != 0 || - reqst->brw_len / CFS_PAGE_SIZE > LNET_MAX_IOV || - (reqst->brw_flags != LST_BRW_CHECK_NONE && - reqst->brw_flags != LST_BRW_CHECK_FULL && - reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) { - reply->brw_status = EINVAL; - return 0; - } - - reply->brw_status = 0; - rc = sfw_alloc_pages(rpc, reqst->brw_len / CFS_PAGE_SIZE, - reqst->brw_rw == LST_BRW_WRITE); - if (rc != 0) return rc; - - if (reqst->brw_rw == LST_BRW_READ) - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC); - else - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON); - - return 0; -} - -sfw_test_client_ops_t brw_test_client = -{ - .tso_init = brw_client_init, - .tso_fini = brw_client_fini, - .tso_prep_rpc = brw_client_prep_rpc, - .tso_done_rpc = brw_client_done_rpc, -}; - -srpc_service_t brw_test_service = -{ - .sv_name = "brw test", - .sv_handler = brw_server_handle, - .sv_bulk_ready = brw_bulk_ready, - .sv_id = SRPC_SERVICE_BRW, -}; diff --git a/lnet/selftest/conctl.c b/lnet/selftest/conctl.c deleted file mode 100644 index a6fc03189c95dc0445ac94bbcf5d5676de83079b..0000000000000000000000000000000000000000 --- a/lnet/selftest/conctl.c +++ /dev/null @@ -1,885 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org - * - * IOC handle in kernel - */ -#ifdef __KERNEL__ - -#include <libcfs/libcfs.h> -#include <lnet/lib-lnet.h> -#include <lnet/lnetst.h> -#include "console.h" - -int -lst_session_new_ioctl(lstio_session_new_args_t *args) -{ - char *name; - int rc; - - if (args->lstio_ses_idp == NULL || /* address for output sid */ - args->lstio_ses_key == 0 || /* no key is specified */ - args->lstio_ses_namep == NULL || /* session name */ - args->lstio_ses_nmlen <= 0 || - args->lstio_ses_nmlen > LST_NAME_SIZE) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_ses_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_ses_namep, - args->lstio_ses_nmlen)) { - LIBCFS_FREE(name, args->lstio_ses_nmlen + 1); - return -EFAULT; - } - - name[args->lstio_ses_nmlen] = 0; - - rc = lstcon_session_new(name, - args->lstio_ses_key, - args->lstio_ses_timeout, - args->lstio_ses_force, - args->lstio_ses_idp); - - LIBCFS_FREE(name, args->lstio_ses_nmlen + 1); - - return rc; -} - -int -lst_session_end_ioctl(lstio_session_end_args_t *args) -{ - if (args->lstio_ses_key != console_session.ses_key) - return -EACCES; - - return lstcon_session_end(); -} - -int -lst_session_info_ioctl(lstio_session_info_args_t *args) -{ - /* no checking of key */ - - if (args->lstio_ses_idp == NULL || /* address for ouput sid */ - args->lstio_ses_keyp == NULL || /* address for ouput key */ - args->lstio_ses_ndinfo == NULL || /* address for output ndinfo */ - args->lstio_ses_namep == NULL || /* address for ouput name */ - args->lstio_ses_nmlen <= 0 || - args->lstio_ses_nmlen > LST_NAME_SIZE) - return -EINVAL; - - return lstcon_session_info(args->lstio_ses_idp, - args->lstio_ses_keyp, - args->lstio_ses_ndinfo, - args->lstio_ses_namep, - args->lstio_ses_nmlen); -} - -int -lst_debug_ioctl(lstio_debug_args_t *args) -{ - char *name = NULL; - int client = 1; - int rc; - - if (args->lstio_dbg_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_dbg_resultp == NULL) - return -EINVAL; - - if (args->lstio_dbg_namep != NULL && /* name of batch/group */ - (args->lstio_dbg_nmlen <= 0 || - args->lstio_dbg_nmlen > LST_NAME_SIZE)) - return -EINVAL; - - if (args->lstio_dbg_namep != NULL) { - LIBCFS_ALLOC(name, args->lstio_dbg_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, args->lstio_dbg_namep, - args->lstio_dbg_nmlen)) { - LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1); - - return -EFAULT; - } - - name[args->lstio_dbg_nmlen] = 0; - } - - rc = -EINVAL; - - switch (args->lstio_dbg_type) { - case LST_OPC_SESSION: - rc = lstcon_session_debug(args->lstio_dbg_timeout, - args->lstio_dbg_resultp); - break; - - case LST_OPC_BATCHSRV: - client = 0; - case LST_OPC_BATCHCLI: - if (name == NULL) - goto out; - - rc = lstcon_batch_debug(args->lstio_dbg_timeout, - name, client, args->lstio_dbg_resultp); - break; - - case LST_OPC_GROUP: - if (name == NULL) - goto out; - - rc = lstcon_group_debug(args->lstio_dbg_timeout, - name, args->lstio_dbg_resultp); - break; - - case LST_OPC_NODES: - if (args->lstio_dbg_count <= 0 || - args->lstio_dbg_idsp == NULL) - goto out; - - rc = lstcon_nodes_debug(args->lstio_dbg_timeout, - args->lstio_dbg_count, - args->lstio_dbg_idsp, - args->lstio_dbg_resultp); - break; - - default: - break; - } - -out: - if (name != NULL) - LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1); - - return rc; -} - -int -lst_group_add_ioctl(lstio_group_add_args_t *args) -{ - char *name; - int rc; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_grp_namep == NULL|| - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_grp_namep, - args->lstio_grp_nmlen)) { - LIBCFS_FREE(name, args->lstio_grp_nmlen); - return -EFAULT; - } - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_group_add(name); - - LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - - return rc; -} - -int -lst_group_del_ioctl(lstio_group_del_args_t *args) -{ - int rc; - char *name; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_grp_namep == NULL || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_grp_namep, - args->lstio_grp_nmlen)) { - LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - return -EFAULT; - } - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_group_del(name); - - LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - - return rc; -} - -int -lst_group_update_ioctl(lstio_group_update_args_t *args) -{ - int rc; - char *name; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_grp_resultp == NULL || - args->lstio_grp_namep == NULL || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_grp_namep, - args->lstio_grp_nmlen)) { - LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - return -EFAULT; - } - - name[args->lstio_grp_nmlen] = 0; - - switch (args->lstio_grp_opc) { - case LST_GROUP_CLEAN: - rc = lstcon_group_clean(name, args->lstio_grp_args); - break; - - case LST_GROUP_REFRESH: - rc = lstcon_group_refresh(name, args->lstio_grp_resultp); - break; - - case LST_GROUP_RMND: - if (args->lstio_grp_count <= 0 || - args->lstio_grp_idsp == NULL) { - rc = -EINVAL; - break; - } - rc = lstcon_nodes_remove(name, args->lstio_grp_count, - args->lstio_grp_idsp, - args->lstio_grp_resultp); - break; - - default: - rc = -EINVAL; - break; - } - - LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - - return rc; -} - -int -lst_nodes_add_ioctl(lstio_group_nodes_args_t *args) -{ - int rc; - char *name; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_grp_idsp == NULL || /* array of ids */ - args->lstio_grp_count <= 0 || - args->lstio_grp_resultp == NULL || - args->lstio_grp_namep == NULL || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) { - LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - - return -EFAULT; - } - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_nodes_add(name, args->lstio_grp_count, - args->lstio_grp_idsp, - args->lstio_grp_resultp); - - LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - - return rc; -} - -int -lst_group_list_ioctl(lstio_group_list_args_t *args) -{ - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_grp_idx < 0 || - args->lstio_grp_namep == NULL || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - return lstcon_group_list(args->lstio_grp_idx, - args->lstio_grp_nmlen, - args->lstio_grp_namep); -} - -int -lst_group_info_ioctl(lstio_group_info_args_t *args) -{ - char *name; - int ndent; - int index; - int rc; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_grp_namep == NULL || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (args->lstio_grp_entp == NULL && /* output: group entry */ - args->lstio_grp_dentsp == NULL) /* output: node entry */ - return -EINVAL; - - if (args->lstio_grp_dentsp != NULL) { /* have node entry */ - if (args->lstio_grp_idxp == NULL || /* node index */ - args->lstio_grp_ndentp == NULL) /* # of node entry */ - return -EINVAL; - - if (copy_from_user(&ndent, - args->lstio_grp_ndentp, sizeof(ndent)) || - copy_from_user(&index, args->lstio_grp_idxp, sizeof(index))) - return -EFAULT; - - if (ndent <= 0 || index < 0) - return -EINVAL; - } - - LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_grp_namep, - args->lstio_grp_nmlen)) { - LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - return -EFAULT; - } - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_group_info(name, args->lstio_grp_entp, - &index, &ndent, args->lstio_grp_dentsp); - - LIBCFS_FREE(name, args->lstio_grp_nmlen + 1); - - if (rc != 0) - return rc; - - if (args->lstio_grp_dentsp != NULL && - (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) || - copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent)))) - rc = -EFAULT; - - return 0; -} - -int -lst_batch_add_ioctl(lstio_batch_add_args_t *args) -{ - int rc; - char *name; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_bat_namep == NULL || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_bat_namep, - args->lstio_bat_nmlen)) { - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - return -EFAULT; - } - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_add(name); - - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - - return rc; -} - -int -lst_batch_run_ioctl(lstio_batch_run_args_t *args) -{ - int rc; - char *name; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_bat_namep == NULL || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_bat_namep, - args->lstio_bat_nmlen)) { - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - return -EFAULT; - } - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_run(name, args->lstio_bat_timeout, - args->lstio_bat_resultp); - - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - - return rc; -} - -int -lst_batch_stop_ioctl(lstio_batch_stop_args_t *args) -{ - int rc; - char *name; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_bat_resultp == NULL || - args->lstio_bat_namep == NULL || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_bat_namep, - args->lstio_bat_nmlen)) { - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - return -EFAULT; - } - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_stop(name, args->lstio_bat_force, - args->lstio_bat_resultp); - - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - - return rc; -} - -int -lst_batch_query_ioctl(lstio_batch_query_args_t *args) -{ - char *name; - int rc; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_bat_resultp == NULL || - args->lstio_bat_namep == NULL || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (args->lstio_bat_testidx < 0) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_bat_namep, - args->lstio_bat_nmlen)) { - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - return -EFAULT; - } - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_test_batch_query(name, - args->lstio_bat_testidx, - args->lstio_bat_client, - args->lstio_bat_timeout, - args->lstio_bat_resultp); - - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - - return rc; -} - -int -lst_batch_list_ioctl(lstio_batch_list_args_t *args) -{ - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_bat_idx < 0 || - args->lstio_bat_namep == NULL || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - return lstcon_batch_list(args->lstio_bat_idx, - args->lstio_bat_nmlen, - args->lstio_bat_namep); -} - -int -lst_batch_info_ioctl(lstio_batch_info_args_t *args) -{ - char *name; - int rc; - int index; - int ndent; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_bat_namep == NULL || /* batch name */ - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (args->lstio_bat_entp == NULL && /* output: batch entry */ - args->lstio_bat_dentsp == NULL) /* output: node entry */ - return -EINVAL; - - if (args->lstio_bat_dentsp != NULL) { /* have node entry */ - if (args->lstio_bat_idxp == NULL || /* node index */ - args->lstio_bat_ndentp == NULL) /* # of node entry */ - return -EINVAL; - - if (copy_from_user(&index, args->lstio_bat_idxp, sizeof(index)) || - copy_from_user(&ndent, args->lstio_bat_ndentp, sizeof(ndent))) - return -EFAULT; - - if (ndent <= 0 || index < 0) - return -EINVAL; - } - - LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, - args->lstio_bat_namep, args->lstio_bat_nmlen)) { - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - return -EFAULT; - } - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_info(name, - args->lstio_bat_entp, args->lstio_bat_server, - args->lstio_bat_testidx, &index, &ndent, - args->lstio_bat_dentsp); - - LIBCFS_FREE(name, args->lstio_bat_nmlen + 1); - - if (rc != 0) - return rc; - - if (args->lstio_bat_dentsp != NULL && - (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) || - copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent)))) - rc = -EFAULT; - - return rc; -} - -int -lst_stat_query_ioctl(lstio_stat_args_t *args) -{ - int rc; - char *name; - - /* TODO: not finished */ - if (args->lstio_sta_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_sta_resultp == NULL || - (args->lstio_sta_namep == NULL && - args->lstio_sta_idsp == NULL) || - args->lstio_sta_nmlen <= 0 || - args->lstio_sta_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (args->lstio_sta_idsp != NULL && - args->lstio_sta_count <= 0) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_sta_nmlen + 1); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, args->lstio_sta_namep, - args->lstio_sta_nmlen)) { - LIBCFS_FREE(name, args->lstio_sta_nmlen + 1); - return -EFAULT; - } - - if (args->lstio_sta_idsp == NULL) { - rc = lstcon_group_stat(name, args->lstio_sta_timeout, - args->lstio_sta_resultp); - } else { - rc = lstcon_nodes_stat(args->lstio_sta_count, - args->lstio_sta_idsp, - args->lstio_sta_timeout, - args->lstio_sta_resultp); - } - - LIBCFS_FREE(name, args->lstio_sta_nmlen + 1); - - return rc; -} - -int lst_test_add_ioctl(lstio_test_args_t *args) -{ - char *name; - char *srcgrp = NULL; - char *dstgrp = NULL; - void *param = NULL; - int ret = 0; - int rc = -ENOMEM; - - if (args->lstio_tes_resultp == NULL || - args->lstio_tes_retp == NULL || - args->lstio_tes_bat_name == NULL || /* no specified batch */ - args->lstio_tes_bat_nmlen <= 0 || - args->lstio_tes_bat_nmlen > LST_NAME_SIZE || - args->lstio_tes_sgrp_name == NULL || /* no source group */ - args->lstio_tes_sgrp_nmlen <= 0 || - args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE || - args->lstio_tes_dgrp_name == NULL || /* no target group */ - args->lstio_tes_dgrp_nmlen <= 0 || - args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - /* have parameter, check if parameter length is valid */ - if (args->lstio_tes_param != NULL && - (args->lstio_tes_param_len <= 0 || - args->lstio_tes_param_len > CFS_PAGE_SIZE - sizeof(lstcon_test_t))) - return -EINVAL; - - LIBCFS_ALLOC(name, args->lstio_tes_bat_nmlen + 1); - if (name == NULL) - return rc; - - LIBCFS_ALLOC(srcgrp, args->lstio_tes_sgrp_nmlen + 1); - if (srcgrp == NULL) - goto out; - - LIBCFS_ALLOC(dstgrp, args->lstio_tes_dgrp_nmlen + 1); - if (srcgrp == NULL) - goto out; - - if (args->lstio_tes_param != NULL) { - LIBCFS_ALLOC(param, args->lstio_tes_param_len); - if (param == NULL) - goto out; - } - - rc = -EFAULT; - if (copy_from_user(name, - args->lstio_tes_bat_name, - args->lstio_tes_bat_nmlen) || - copy_from_user(srcgrp, - args->lstio_tes_sgrp_name, - args->lstio_tes_sgrp_nmlen) || - copy_from_user(dstgrp, - args->lstio_tes_dgrp_name, - args->lstio_tes_dgrp_nmlen) || - copy_from_user(param, args->lstio_tes_param, - args->lstio_tes_param_len)) - goto out; - - rc = lstcon_test_add(name, - args->lstio_tes_type, - args->lstio_tes_loop, - args->lstio_tes_concur, - args->lstio_tes_dist, args->lstio_tes_span, - srcgrp, dstgrp, param, args->lstio_tes_param_len, - &ret, args->lstio_tes_resultp); - - if (ret != 0) - rc = (copy_to_user(args->lstio_tes_retp, &ret, sizeof(ret))) ? - -EFAULT : 0; -out: - if (name != NULL) - LIBCFS_FREE(name, args->lstio_tes_bat_nmlen + 1); - - if (srcgrp != NULL) - LIBCFS_FREE(srcgrp, args->lstio_tes_sgrp_nmlen + 1); - - if (dstgrp != NULL) - LIBCFS_FREE(dstgrp, args->lstio_tes_dgrp_nmlen + 1); - - if (param != NULL) - LIBCFS_FREE(param, args->lstio_tes_param_len); - - return rc; -} - -int -lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data) -{ - char *buf; - int opc = data->ioc_u32[0]; - int rc; - - if (cmd != IOC_LIBCFS_LNETST) - return -EINVAL; - - if (data->ioc_plen1 > CFS_PAGE_SIZE) - return -EINVAL; - - LIBCFS_ALLOC(buf, data->ioc_plen1); - if (buf == NULL) - return -ENOMEM; - - /* copy in parameter */ - if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) { - LIBCFS_FREE(buf, data->ioc_plen1); - return -EFAULT; - } - - mutex_down(&console_session.ses_mutex); - - console_session.ses_laststamp = cfs_time_current_sec(); - - if (console_session.ses_shutdown) { - rc = -ESHUTDOWN; - goto out; - } - - if (console_session.ses_expired) - lstcon_session_end(); - - if (opc != LSTIO_SESSION_NEW && - console_session.ses_state == LST_SESSION_NONE) { - CDEBUG(D_NET, "LST no active session\n"); - rc = -ESRCH; - goto out; - } - - memset(&console_session.ses_trans_stat, 0, sizeof(lstcon_trans_stat_t)); - - switch (opc) { - case LSTIO_SESSION_NEW: - rc = lst_session_new_ioctl((lstio_session_new_args_t *)buf); - break; - case LSTIO_SESSION_END: - rc = lst_session_end_ioctl((lstio_session_end_args_t *)buf); - break; - case LSTIO_SESSION_INFO: - rc = lst_session_info_ioctl((lstio_session_info_args_t *)buf); - break; - case LSTIO_DEBUG: - rc = lst_debug_ioctl((lstio_debug_args_t *)buf); - break; - case LSTIO_GROUP_ADD: - rc = lst_group_add_ioctl((lstio_group_add_args_t *)buf); - break; - case LSTIO_GROUP_DEL: - rc = lst_group_del_ioctl((lstio_group_del_args_t *)buf); - break; - case LSTIO_GROUP_UPDATE: - rc = lst_group_update_ioctl((lstio_group_update_args_t *)buf); - break; - case LSTIO_NODES_ADD: - rc = lst_nodes_add_ioctl((lstio_group_nodes_args_t *)buf); - break; - case LSTIO_GROUP_LIST: - rc = lst_group_list_ioctl((lstio_group_list_args_t *)buf); - break; - case LSTIO_GROUP_INFO: - rc = lst_group_info_ioctl((lstio_group_info_args_t *)buf); - break; - case LSTIO_BATCH_ADD: - rc = lst_batch_add_ioctl((lstio_batch_add_args_t *)buf); - break; - case LSTIO_BATCH_START: - rc = lst_batch_run_ioctl((lstio_batch_run_args_t *)buf); - break; - case LSTIO_BATCH_STOP: - rc = lst_batch_stop_ioctl((lstio_batch_stop_args_t *)buf); - break; - case LSTIO_BATCH_QUERY: - rc = lst_batch_query_ioctl((lstio_batch_query_args_t *)buf); - break; - case LSTIO_BATCH_LIST: - rc = lst_batch_list_ioctl((lstio_batch_list_args_t *)buf); - break; - case LSTIO_BATCH_INFO: - rc = lst_batch_info_ioctl((lstio_batch_info_args_t *)buf); - break; - case LSTIO_TEST_ADD: - rc = lst_test_add_ioctl((lstio_test_args_t *)buf); - break; - case LSTIO_STAT_QUERY: - rc = lst_stat_query_ioctl((lstio_stat_args_t *)buf); - break; - default: - rc = -EINVAL; - } - - if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat, - sizeof(lstcon_trans_stat_t))) - rc = -EFAULT; -out: - mutex_up(&console_session.ses_mutex); - - LIBCFS_FREE(buf, data->ioc_plen1); - - return rc; -} - -EXPORT_SYMBOL(lstcon_ioctl_entry); - -#endif diff --git a/lnet/selftest/conrpc.c b/lnet/selftest/conrpc.c deleted file mode 100644 index b47fbe1b23a4906f0a8ee55a0eae691fe34e2725..0000000000000000000000000000000000000000 --- a/lnet/selftest/conrpc.c +++ /dev/null @@ -1,1284 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org - * - * Console framework rpcs - */ -#ifdef __KERNEL__ - -#include <libcfs/libcfs.h> -#include <lnet/lib-lnet.h> -#include "timer.h" -#include "conrpc.h" -#include "console.h" - -void lstcon_rpc_stat_reply(int, srpc_msg_t *, - lstcon_node_t *, lstcon_trans_stat_t *); - -static void -lstcon_rpc_done(srpc_client_rpc_t *rpc) -{ - lstcon_rpc_t *crpc = (lstcon_rpc_t *)rpc->crpc_priv; - - LASSERT (crpc != NULL && rpc == crpc->crp_rpc); - LASSERT (crpc->crp_posted && !crpc->crp_finished); - - spin_lock(&rpc->crpc_lock); - - if (crpc->crp_trans == NULL) { - /* Orphan RPC is not in any transaction, - * I'm just a poor body and nobody loves me */ - spin_unlock(&rpc->crpc_lock); - - /* release it */ - lstcon_rpc_put(crpc); - return; - } - - /* not an orphan RPC */ - crpc->crp_finished = 1; - - if (crpc->crp_stamp == 0) { - /* not aborted */ - LASSERT (crpc->crp_status == 0); - - crpc->crp_stamp = cfs_time_current(); - crpc->crp_status = rpc->crpc_status; - } - - /* wakeup (transaction)thread if I'm the last RPC in the transaction */ - if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining)) - cfs_waitq_signal(&crpc->crp_trans->tas_waitq); - - spin_unlock(&rpc->crpc_lock); -} - -int -lstcon_rpc_init(lstcon_node_t *nd, int service, - int npg, int cached, lstcon_rpc_t *crpc) -{ - - crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service, - npg, npg * CFS_PAGE_SIZE, - lstcon_rpc_done, (void *)crpc); - if (crpc->crp_rpc == NULL) - return -ENOMEM; - - crpc->crp_trans = NULL; - crpc->crp_node = nd; - crpc->crp_posted = 0; - crpc->crp_finished = 0; - crpc->crp_unpacked = 0; - crpc->crp_status = 0; - crpc->crp_stamp = 0; - crpc->crp_static = !cached; - CFS_INIT_LIST_HEAD(&crpc->crp_link); - - atomic_inc(&console_session.ses_rpc_counter); - - return 0; -} - -int -lstcon_rpc_prep(lstcon_node_t *nd, int service, - int npg, lstcon_rpc_t **crpcpp) -{ - lstcon_rpc_t *crpc = NULL; - int rc; - - spin_lock(&console_session.ses_rpc_lock); - - if (!list_empty(&console_session.ses_rpc_freelist)) { - crpc = list_entry(console_session.ses_rpc_freelist.next, - lstcon_rpc_t, crp_link); - list_del_init(&crpc->crp_link); - } - - spin_unlock(&console_session.ses_rpc_lock); - - if (crpc == NULL) { - LIBCFS_ALLOC(crpc, sizeof(*crpc)); - if (crpc == NULL) - return -ENOMEM; - } - - rc = lstcon_rpc_init(nd, service, npg, 1, crpc); - if (rc == 0) { - *crpcpp = crpc; - return 0; - } - - LIBCFS_FREE(crpc, sizeof(*crpc)); - - return rc; -} - -void -lstcon_rpc_put(lstcon_rpc_t *crpc) -{ - srpc_bulk_t *bulk = &crpc->crp_rpc->crpc_bulk; - int i; - - LASSERT (list_empty(&crpc->crp_link)); - - for (i = 0; i < bulk->bk_niov; i++) { - if (bulk->bk_iovs[i].kiov_page == NULL) - continue; - - cfs_free_page(bulk->bk_iovs[i].kiov_page); - } - - srpc_client_rpc_decref(crpc->crp_rpc); - - if (crpc->crp_static) { - /* Static RPC, not allocated */ - memset(crpc, 0, sizeof(*crpc)); - crpc->crp_static = 1; - - } else { - spin_lock(&console_session.ses_rpc_lock); - - list_add(&crpc->crp_link, &console_session.ses_rpc_freelist); - - spin_unlock(&console_session.ses_rpc_lock); - } - - /* RPC is not alive now */ - atomic_dec(&console_session.ses_rpc_counter); -} - -void -lstcon_rpc_post(lstcon_rpc_t *crpc) -{ - lstcon_rpc_trans_t *trans = crpc->crp_trans; - - LASSERT (trans != NULL); - - atomic_inc(&trans->tas_remaining); - crpc->crp_posted = 1; - - sfw_post_rpc(crpc->crp_rpc); -} - -static char * -lstcon_rpc_trans_name(int transop) -{ - if (transop == LST_TRANS_SESNEW) - return "SESNEW"; - - if (transop == LST_TRANS_SESEND) - return "SESEND"; - - if (transop == LST_TRANS_SESQRY) - return "SESQRY"; - - if (transop == LST_TRANS_SESPING) - return "SESPING"; - - if (transop == LST_TRANS_TSBCLIADD) - return "TSBCLIADD"; - - if (transop == LST_TRANS_TSBSRVADD) - return "TSBSRVADD"; - - if (transop == LST_TRANS_TSBRUN) - return "TSBRUN"; - - if (transop == LST_TRANS_TSBSTOP) - return "TSBSTOP"; - - if (transop == LST_TRANS_TSBCLIQRY) - return "TSBCLIQRY"; - - if (transop == LST_TRANS_TSBSRVQRY) - return "TSBSRVQRY"; - - if (transop == LST_TRANS_STATQRY) - return "STATQRY"; - - return "Unknown"; -} - -int -lstcon_rpc_trans_prep(struct list_head *translist, - int transop, lstcon_rpc_trans_t **transpp) -{ - lstcon_rpc_trans_t *trans; - - if (translist != NULL) { - list_for_each_entry(trans, translist, tas_link) { - /* Can't enqueue two private transaction on - * the same object */ - if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE) - return -EPERM; - } - } - - /* create a trans group */ - LIBCFS_ALLOC(trans, sizeof(*trans)); - if (trans == NULL) - return -ENOMEM; - - trans->tas_opc = transop; - - if (translist == NULL) - CFS_INIT_LIST_HEAD(&trans->tas_olink); - else - list_add_tail(&trans->tas_olink, translist); - - list_add_tail(&trans->tas_link, &console_session.ses_trans_list); - - CFS_INIT_LIST_HEAD(&trans->tas_rpcs_list); - atomic_set(&trans->tas_remaining, 0); - cfs_waitq_init(&trans->tas_waitq); - - *transpp = trans; - - return 0; -} - -void -lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *crpc) -{ - list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list); - crpc->crp_trans = trans; -} - -void -lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error) -{ - srpc_client_rpc_t *rpc; - lstcon_rpc_t *crpc; - lstcon_node_t *nd; - - list_for_each_entry (crpc, &trans->tas_rpcs_list, crp_link) { - rpc = crpc->crp_rpc; - - spin_lock(&rpc->crpc_lock); - - if (!crpc->crp_posted || crpc->crp_stamp != 0) { - /* rpc done or aborted already */ - spin_unlock(&rpc->crpc_lock); - continue; - } - - crpc->crp_stamp = cfs_time_current(); - crpc->crp_status = error; - - spin_unlock(&rpc->crpc_lock); - - sfw_abort_rpc(rpc); - - if (error != ETIMEDOUT) - continue; - - nd = crpc->crp_node; - if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp)) - continue; - - nd->nd_stamp = crpc->crp_stamp; - nd->nd_state = LST_NODE_DOWN; - } -} - -static int -lstcon_rpc_trans_check(lstcon_rpc_trans_t *trans) -{ - if (console_session.ses_shutdown && - !list_empty(&trans->tas_olink)) /* It's not an end session RPC */ - return 1; - - return (atomic_read(&trans->tas_remaining) == 0) ? 1: 0; -} - -int -lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout) -{ - lstcon_rpc_t *crpc; - int rc; - - if (list_empty(&trans->tas_rpcs_list)) - return 0; - - if (timeout < LST_TRANS_MIN_TIMEOUT) - timeout = LST_TRANS_MIN_TIMEOUT; - - CDEBUG(D_NET, "Transaction %s started\n", - lstcon_rpc_trans_name(trans->tas_opc)); - - /* post all requests */ - list_for_each_entry (crpc, &trans->tas_rpcs_list, crp_link) { - LASSERT (!crpc->crp_posted); - - lstcon_rpc_post(crpc); - } - - mutex_up(&console_session.ses_mutex); - - rc = cfs_waitq_wait_event_interruptible_timeout(trans->tas_waitq, - lstcon_rpc_trans_check(trans), - timeout * HZ); - - rc = (rc > 0)? 0: ((rc < 0)? -EINTR: -ETIMEDOUT); - - mutex_down(&console_session.ses_mutex); - - if (console_session.ses_shutdown) - rc = -ESHUTDOWN; - - if (rc != 0) { - /* treat short timeout as canceled */ - if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2) - rc = -EINTR; - - lstcon_rpc_trans_abort(trans, rc); - } - - CDEBUG(D_NET, "Transaction %s stopped: %d\n", - lstcon_rpc_trans_name(trans->tas_opc), rc); - - lstcon_rpc_trans_stat(trans, lstcon_trans_stat()); - - return rc; -} - -int -lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp) -{ - lstcon_node_t *nd = crpc->crp_node; - srpc_client_rpc_t *rpc = crpc->crp_rpc; - srpc_generic_reply_t *rep; - - LASSERT (nd != NULL && rpc != NULL); - LASSERT (crpc->crp_stamp != 0); - - if (crpc->crp_status != 0) { - *msgpp = NULL; - return crpc->crp_status; - } - - *msgpp = &rpc->crpc_replymsg; - if (!crpc->crp_unpacked) { - sfw_unpack_message(*msgpp); - crpc->crp_unpacked = 1; - } - - if (cfs_time_after(nd->nd_stamp, crpc->crp_stamp)) - return 0; - - nd->nd_stamp = crpc->crp_stamp; - rep = &(*msgpp)->msg_body.reply; - - if (rep->sid.ses_nid == LNET_NID_ANY) - nd->nd_state = LST_NODE_UNKNOWN; - else if (lstcon_session_match(rep->sid)) - nd->nd_state = LST_NODE_ACTIVE; - else - nd->nd_state = LST_NODE_BUSY; - - return 0; -} - -void -lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat) -{ - lstcon_rpc_t *crpc; - srpc_client_rpc_t *rpc; - srpc_msg_t *rep; - int error; - - LASSERT (stat != NULL); - - memset(stat, 0, sizeof(*stat)); - - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - lstcon_rpc_stat_total(stat, 1); - - rpc = crpc->crp_rpc; - - LASSERT (crpc->crp_stamp != 0); - - error = lstcon_rpc_get_reply(crpc, &rep); - if (error != 0) { - lstcon_rpc_stat_failure(stat, 1); - if (stat->trs_rpc_errno == 0) - stat->trs_rpc_errno = -error; - - continue; - } - - lstcon_rpc_stat_success(stat, 1); - - lstcon_rpc_stat_reply(trans->tas_opc, rep, - crpc->crp_node, stat); - } - - CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, " - "RPC error(%d), Framework error(%d)\n", - lstcon_rpc_trans_name(trans->tas_opc), - lstcon_rpc_stat_success(stat, 0), - lstcon_rpc_stat_failure(stat, 0), - lstcon_rpc_stat_total(stat, 0), - stat->trs_rpc_errno, stat->trs_fwk_errno); - - return; -} - -int -lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, - struct list_head *head_up, - lstcon_rpc_readent_func_t readent) -{ - struct list_head tmp; - struct list_head *next; - lstcon_rpc_ent_t *ent; - srpc_generic_reply_t *rep; - srpc_client_rpc_t *rpc; - lstcon_rpc_t *crpc; - srpc_msg_t *msg; - lstcon_node_t *nd; - cfs_duration_t dur; - struct timeval tv; - int error; - - LASSERT (head_up != NULL); - - next = head_up; - - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - if (copy_from_user(&tmp, next, sizeof(struct list_head))) - return -EFAULT; - - if (tmp.next == head_up) - return 0; - - next = tmp.next; - - ent = list_entry(next, lstcon_rpc_ent_t, rpe_link); - - rpc = crpc->crp_rpc; - - LASSERT (crpc->crp_stamp != 0); - - error = lstcon_rpc_get_reply(crpc, &msg); - - nd = crpc->crp_node; - - dur = cfs_time_sub(crpc->crp_stamp, - console_session.ses_id.ses_stamp); - cfs_duration_usec(dur, &tv); - - if (copy_to_user(&ent->rpe_peer, - &nd->nd_id, sizeof(lnet_process_id_t)) || - copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) || - copy_to_user(&ent->rpe_state, - &nd->nd_state, sizeof(nd->nd_state)) || - copy_to_user(&ent->rpe_rpc_errno, &error, sizeof(error))) - return -EFAULT; - - if (error != 0) - continue; - - /* RPC is done */ - rep = (srpc_generic_reply_t *)&msg->msg_body.reply; - - if (copy_to_user(&ent->rpe_sid, - &rep->sid, sizeof(lst_sid_t)) || - copy_to_user(&ent->rpe_fwk_errno, - &rep->status, sizeof(rep->status))) - return -EFAULT; - - if (readent == NULL) - continue; - - if ((error = readent(trans->tas_opc, msg, ent)) != 0) - return error; - } - - return 0; -} - -void -lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans) -{ - srpc_client_rpc_t *rpc; - lstcon_rpc_t *crpc; - lstcon_rpc_t *tmp; - int count = 0; - - list_for_each_entry_safe(crpc, tmp, - &trans->tas_rpcs_list, crp_link) { - rpc = crpc->crp_rpc; - - spin_lock(&rpc->crpc_lock); - - /* free it if not posted or finished already */ - if (!crpc->crp_posted || crpc->crp_finished) { - spin_unlock(&rpc->crpc_lock); - - list_del_init(&crpc->crp_link); - lstcon_rpc_put(crpc); - - continue; - } - - /* rpcs can be still not callbacked (even LNetMDUnlink is called) - * because huge timeout for inaccessible network, don't make - * user wait for them, just abandon them, they will be recycled - * in callback */ - - LASSERT (crpc->crp_status != 0); - - crpc->crp_node = NULL; - crpc->crp_trans = NULL; - list_del_init(&crpc->crp_link); - count ++; - - spin_unlock(&rpc->crpc_lock); - - atomic_dec(&trans->tas_remaining); - } - - LASSERT (atomic_read(&trans->tas_remaining) == 0); - - list_del(&trans->tas_link); - if (!list_empty(&trans->tas_olink)) - list_del(&trans->tas_olink); - - CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n", - lstcon_rpc_trans_name(trans->tas_opc), count); - - LIBCFS_FREE(trans, sizeof(*trans)); - - return; -} - -int -lstcon_sesrpc_prep(lstcon_node_t *nd, int transop, lstcon_rpc_t **crpc) -{ - srpc_mksn_reqst_t *msrq; - srpc_rmsn_reqst_t *rsrq; - int rc; - - switch (transop) { - case LST_TRANS_SESNEW: - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION, 0, crpc); - if (rc != 0) - return rc; - - msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst; - msrq->mksn_sid = console_session.ses_id; - msrq->mksn_force = console_session.ses_force; - strncpy(msrq->mksn_name, console_session.ses_name, - strlen(console_session.ses_name)); - break; - - case LST_TRANS_SESEND: - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION, 0, crpc); - if (rc != 0) - return rc; - - rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst; - rsrq->rmsn_sid = console_session.ses_id; - break; - - default: - LBUG(); - } - - return 0; -} - -int -lstcon_dbgrpc_prep(lstcon_node_t *nd, lstcon_rpc_t **crpc) -{ - srpc_debug_reqst_t *drq; - int rc; - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, 0, crpc); - if (rc != 0) - return rc; - - drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst; - - drq->dbg_sid = console_session.ses_id; - drq->dbg_flags = 0; - - return rc; -} - -int -lstcon_batrpc_prep(lstcon_node_t *nd, int transop, - lstcon_tsb_hdr_t *tsb, lstcon_rpc_t **crpc) -{ - lstcon_batch_t *batch; - srpc_batch_reqst_t *brq; - int rc; - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, 0, crpc); - if (rc != 0) - return rc; - - brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst; - - brq->bar_sid = console_session.ses_id; - brq->bar_bid = tsb->tsb_id; - brq->bar_testidx = tsb->tsb_index; - brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN : - (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP: - SRPC_BATCH_OPC_QUERY); - - if (transop != LST_TRANS_TSBRUN && - transop != LST_TRANS_TSBSTOP) - return 0; - - LASSERT (tsb->tsb_index == 0); - - batch = (lstcon_batch_t *)tsb; - brq->bar_arg = batch->bat_arg; - - return 0; -} - -int -lstcon_statrpc_prep(lstcon_node_t *nd, lstcon_rpc_t **crpc) -{ - srpc_stat_reqst_t *srq; - int rc; - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, 0, crpc); - if (rc != 0) - return rc; - - srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst; - - srq->str_sid = console_session.ses_id; - srq->str_type = 0; /* XXX remove it */ - - return 0; -} - -lnet_process_id_t * -lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov) -{ - lnet_process_id_t *pid; - int i; - - i = idx / (CFS_PAGE_SIZE / sizeof(lnet_process_id_t)); - - LASSERT (i < nkiov); - - pid = (lnet_process_id_t *)cfs_page_address(kiov[i].kiov_page); - - return &pid[idx % (CFS_PAGE_SIZE / sizeof(lnet_process_id_t))]; -} - -int -lstcon_dstnodes_prep(lstcon_group_t *grp, int idx, - int dist, int span, int nkiov, lnet_kiov_t *kiov) -{ - lnet_process_id_t *pid; - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; - int start; - int end; - int i = 0; - - LASSERT (dist >= 1); - LASSERT (span >= 1); - LASSERT (grp->grp_nnode >= 1); - - if (span > grp->grp_nnode) - return -EINVAL; - - start = ((idx / dist) * span) % grp->grp_nnode; - end = ((idx / dist) * span + span - 1) % grp->grp_nnode; - - list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) { - nd = ndl->ndl_node; - if (i < start) { - i ++; - continue; - } - - if (i > (end >= start ? end: grp->grp_nnode)) - break; - - pid = lstcon_next_id((i - start), nkiov, kiov); - *pid = nd->nd_id; - i++; - } - - if (start <= end) /* done */ - return 0; - - list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) { - if (i > grp->grp_nnode + end) - break; - - nd = ndl->ndl_node; - pid = lstcon_next_id((i - start), nkiov, kiov); - *pid = nd->nd_id; - i++; - } - - return 0; -} - -int -lstcon_pingrpc_prep(lst_test_ping_param_t *param, srpc_test_reqst_t *req) -{ - test_ping_req_t *prq = &req->tsr_u.ping; - - prq->png_size = param->png_size; - prq->png_flags = param->png_flags; - /* TODO dest */ - return 0; -} - -int -lstcon_bulkrpc_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req) -{ - test_bulk_req_t *brq = &req->tsr_u.bulk; - - brq->blk_opc = param->blk_opc; - brq->blk_npg = (param->blk_size + CFS_PAGE_SIZE - 1) / CFS_PAGE_SIZE; - brq->blk_flags = param->blk_flags; - - return 0; -} - -int -lstcon_testrpc_prep(lstcon_node_t *nd, int transop, - lstcon_test_t *test, lstcon_rpc_t **crpc) -{ - lstcon_group_t *sgrp = test->tes_src_grp; - lstcon_group_t *dgrp = test->tes_dst_grp; - srpc_test_reqst_t *trq; - srpc_bulk_t *bulk; - int i; - int n = 0; - int rc = 0; - - if (transop == LST_TRANS_TSBCLIADD) - n = sfw_id_pages(test->tes_span); - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, n, crpc); - if (rc != 0) - return rc; - - trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst; - - if (transop == LST_TRANS_TSBSRVADD) { - int ndist = (sgrp->grp_nnode + test->tes_dist - 1) / test->tes_dist; - int nspan = (dgrp->grp_nnode + test->tes_span - 1) / test->tes_span; - int nmax = (ndist + nspan - 1) / nspan; - - trq->tsr_ndest = 0; - trq->tsr_loop = nmax * test->tes_dist * test->tes_concur; - - } else { - bulk = &(*crpc)->crp_rpc->crpc_bulk; - - for (i = 0; i < n; i++) { - bulk->bk_iovs[i].kiov_offset = 0; - bulk->bk_iovs[i].kiov_len = CFS_PAGE_SIZE; - bulk->bk_iovs[i].kiov_page = cfs_alloc_page(CFS_ALLOC_STD); - - if (bulk->bk_iovs[i].kiov_page != NULL) - continue; - - lstcon_rpc_put(*crpc); - return -ENOMEM; - } - - bulk->bk_sink = 0; - - LASSERT (transop == LST_TRANS_TSBCLIADD); - - rc = lstcon_dstnodes_prep(test->tes_dst_grp, - test->tes_cliidx++, test->tes_dist, - test->tes_span, n, &bulk->bk_iovs[0]); - if (rc != 0) { - lstcon_rpc_put(*crpc); - return rc; - } - - trq->tsr_ndest = test->tes_span; - trq->tsr_loop = test->tes_loop; - } - - trq->tsr_sid = console_session.ses_id; - trq->tsr_bid = test->tes_hdr.tsb_id; - trq->tsr_concur = test->tes_concur; - trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0; - trq->tsr_stop_onerr = test->tes_stop_onerr; - - switch (test->tes_type) { - case LST_TEST_PING: - trq->tsr_service = SRPC_SERVICE_PING; - rc = lstcon_pingrpc_prep((lst_test_ping_param_t *)&test->tes_param[0], trq); - break; - case LST_TEST_BULK: - trq->tsr_service = SRPC_SERVICE_BRW; - rc = lstcon_bulkrpc_prep((lst_test_bulk_param_t *)&test->tes_param[0], trq); - break; - default: - LBUG(); - break; - } - - return rc; -} - -void -lstcon_rpc_stat_reply(int transop, srpc_msg_t *msg, - lstcon_node_t *nd, lstcon_trans_stat_t *stat) -{ - srpc_mksn_reply_t *mksn_rep; - srpc_rmsn_reply_t *rmsn_rep; - srpc_debug_reply_t *dbg_rep; - srpc_batch_reply_t *bat_rep; - srpc_test_reply_t *test_rep; - srpc_stat_reply_t *stat_rep; - int errno = 0; - - switch (transop) { - case LST_TRANS_SESNEW: - mksn_rep = &msg->msg_body.mksn_reply; - - if (mksn_rep->mksn_status == 0) { - lstcon_sesop_stat_success(stat, 1); - /* session timeout on remote node */ - nd->nd_timeout = mksn_rep->mksn_timeout; - return; - } - - LASSERT (mksn_rep->mksn_status == EBUSY || - mksn_rep->mksn_status == EINVAL); - - lstcon_sesop_stat_failure(stat, 1); - errno = mksn_rep->mksn_status; - break; - - case LST_TRANS_SESEND: - rmsn_rep = &msg->msg_body.rmsn_reply; - /* ESRCH is not an error for end session */ - if (rmsn_rep->rmsn_status == 0 || - rmsn_rep->rmsn_status == ESRCH) { - lstcon_sesop_stat_success(stat, 1); - return; - } - - LASSERT (rmsn_rep->rmsn_status == EBUSY || - rmsn_rep->rmsn_status == EINVAL); - - lstcon_sesop_stat_failure(stat, 1); - errno = rmsn_rep->rmsn_status; - break; - - case LST_TRANS_SESQRY: - case LST_TRANS_SESPING: - dbg_rep = &msg->msg_body.dbg_reply; - - if (dbg_rep->dbg_status == ESRCH) { - lstcon_sesqry_stat_unknown(stat, 1); - return; - } - - LASSERT (dbg_rep->dbg_status == 0); - - if (lstcon_session_match(dbg_rep->dbg_sid)) - lstcon_sesqry_stat_active(stat, 1); - else - lstcon_sesqry_stat_busy(stat, 1); - return; - - case LST_TRANS_TSBRUN: - case LST_TRANS_TSBSTOP: - bat_rep = &msg->msg_body.bat_reply; - - if (bat_rep->bar_status == 0) { - lstcon_tsbop_stat_success(stat, 1); - return; - } - - if (bat_rep->bar_status == EPERM && - transop == LST_TRANS_TSBSTOP) { - lstcon_tsbop_stat_success(stat, 1); - return; - } - - lstcon_tsbop_stat_failure(stat, 1); - errno = bat_rep->bar_status; - break; - - case LST_TRANS_TSBCLIQRY: - case LST_TRANS_TSBSRVQRY: - bat_rep = &msg->msg_body.bat_reply; - - if (bat_rep->bar_active != 0) - lstcon_tsbqry_stat_run(stat, 1); - else - lstcon_tsbqry_stat_idle(stat, 1); - - if (bat_rep->bar_status == 0) - return; - - lstcon_tsbqry_stat_failure(stat, 1); - errno = bat_rep->bar_status; - break; - - case LST_TRANS_TSBCLIADD: - case LST_TRANS_TSBSRVADD: - test_rep = &msg->msg_body.tes_reply; - - if (test_rep->tsr_status == 0) { - lstcon_tsbop_stat_success(stat, 1); - return; - } - - lstcon_tsbop_stat_failure(stat, 1); - errno = test_rep->tsr_status; - break; - - case LST_TRANS_STATQRY: - stat_rep = &msg->msg_body.stat_reply; - - if (stat_rep->str_status == 0) { - lstcon_statqry_stat_success(stat, 1); - return; - } - - lstcon_statqry_stat_failure(stat, 1); - errno = stat_rep->str_status; - break; - - default: - LBUG(); - } - - if (stat->trs_fwk_errno == 0) - stat->trs_fwk_errno = errno; - - return; -} - -int -lstcon_rpc_trans_ndlist(struct list_head *ndlist, - struct list_head *translist, int transop, - void *arg, lstcon_rpc_cond_func_t condition, - lstcon_rpc_trans_t **transpp) -{ - lstcon_rpc_trans_t *trans; - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; - lstcon_rpc_t *rpc; - int rc; - - /* Creating session RPG for list of nodes */ - - rc = lstcon_rpc_trans_prep(translist, transop, &trans); - if (rc != 0) { - CERROR("Can't create transaction %d: %d\n", transop, rc); - return rc; - } - - list_for_each_entry(ndl, ndlist, ndl_link) { - rc = condition == NULL ? 1 : - condition(transop, ndl->ndl_node, arg); - - if (rc == 0) - continue; - - if (rc < 0) { - CDEBUG(D_NET, "Condition error while creating RPC " - " for transaction %d: %d\n", transop, rc); - break; - } - - nd = ndl->ndl_node; - - switch (transop) { - case LST_TRANS_SESNEW: - case LST_TRANS_SESEND: - rc = lstcon_sesrpc_prep(nd, transop, &rpc); - break; - case LST_TRANS_SESQRY: - case LST_TRANS_SESPING: - rc = lstcon_dbgrpc_prep(nd, &rpc); - break; - case LST_TRANS_TSBCLIADD: - case LST_TRANS_TSBSRVADD: - rc = lstcon_testrpc_prep(nd, transop, - (lstcon_test_t *)arg, &rpc); - break; - case LST_TRANS_TSBRUN: - case LST_TRANS_TSBSTOP: - case LST_TRANS_TSBCLIQRY: - case LST_TRANS_TSBSRVQRY: - rc = lstcon_batrpc_prep(nd, transop, - (lstcon_tsb_hdr_t *)arg, &rpc); - break; - case LST_TRANS_STATQRY: - rc = lstcon_statrpc_prep(nd, &rpc); - break; - default: - rc = -EINVAL; - break; - } - - if (rc != 0) { - CERROR("Failed to create RPC for transaction %s: %d\n", - lstcon_rpc_trans_name(transop), rc); - break; - } - - lstcon_rpc_trans_addreq(trans, rpc); - } - - if (rc == 0) { - *transpp = trans; - return 0; - } - - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -void -lstcon_rpc_pinger(void *arg) -{ - stt_timer_t *ptimer = (stt_timer_t *)arg; - lstcon_rpc_trans_t *trans; - lstcon_rpc_t *crpc; - srpc_msg_t *rep; - srpc_debug_reqst_t *drq; - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; - time_t intv; - int count = 0; - int rc; - - /* RPC pinger is a special case of transaction, - * it's called by timer at 8 seconds interval. - */ - mutex_down(&console_session.ses_mutex); - - if (console_session.ses_shutdown || console_session.ses_expired) { - mutex_up(&console_session.ses_mutex); - return; - } - - if (!console_session.ses_expired && - cfs_time_current_sec() - console_session.ses_laststamp > - console_session.ses_timeout) - console_session.ses_expired = 1; - - trans = console_session.ses_ping; - - LASSERT (trans != NULL); - - list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) { - nd = ndl->ndl_node; - - if (console_session.ses_expired) { - /* idle console, end session on all nodes */ - if (nd->nd_state != LST_NODE_ACTIVE) - continue; - - rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND, &crpc); - if (rc != 0) { - CERROR("Out of memory\n"); - break; - } - - lstcon_rpc_trans_addreq(trans, crpc); - lstcon_rpc_post(crpc); - - continue; - } - - crpc = &nd->nd_ping; - - if (crpc->crp_rpc != NULL) { - LASSERT (crpc->crp_trans == trans); - LASSERT (!list_empty(&crpc->crp_link)); - - spin_lock(&crpc->crp_rpc->crpc_lock); - - LASSERT (crpc->crp_posted); - - if (!crpc->crp_finished) { - /* in flight */ - spin_unlock(&crpc->crp_rpc->crpc_lock); - continue; - } - - spin_unlock(&crpc->crp_rpc->crpc_lock); - - lstcon_rpc_get_reply(crpc, &rep); - - list_del_init(&crpc->crp_link); - - lstcon_rpc_put(crpc); - } - - if (nd->nd_state != LST_NODE_ACTIVE) - continue; - - intv = cfs_duration_sec(cfs_time_sub(cfs_time_current(), - nd->nd_stamp)); - if (intv < nd->nd_timeout / 2) - continue; - - rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG, 0, 0, crpc); - if (rc != 0) { - CERROR("Out of memory\n"); - break; - } - - drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst; - - drq->dbg_sid = console_session.ses_id; - drq->dbg_flags = 0; - - lstcon_rpc_trans_addreq(trans, crpc); - lstcon_rpc_post(crpc); - - count ++; - } - - if (console_session.ses_expired) { - mutex_up(&console_session.ses_mutex); - return; - } - - CDEBUG(D_NET, "Ping %d nodes in session\n", count); - - ptimer->stt_expires = cfs_time_current_sec() + LST_PING_INTERVAL; - stt_add_timer(ptimer); - - mutex_up(&console_session.ses_mutex); -} - -int -lstcon_rpc_pinger_start(void) -{ - stt_timer_t *ptimer; - int rc; - - LASSERT (list_empty(&console_session.ses_rpc_freelist)); - LASSERT (atomic_read(&console_session.ses_rpc_counter) == 0); - - rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING, - &console_session.ses_ping); - if (rc != 0) { - CERROR("Failed to create console pinger\n"); - return rc; - } - - ptimer = &console_session.ses_ping_timer; - ptimer->stt_expires = cfs_time_current_sec() + LST_PING_INTERVAL; - - stt_add_timer(ptimer); - - return 0; -} - -void -lstcon_rpc_pinger_stop(void) -{ - LASSERT (console_session.ses_shutdown); - - stt_del_timer(&console_session.ses_ping_timer); - - lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN); - lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat()); - lstcon_rpc_trans_destroy(console_session.ses_ping); - - memset(lstcon_trans_stat(), 0, sizeof(lstcon_trans_stat_t)); - - console_session.ses_ping = NULL; -} - -void -lstcon_rpc_cleanup_wait(void) -{ - lstcon_rpc_trans_t *trans; - lstcon_rpc_t *crpc; - struct list_head *pacer; - struct list_head zlist; - - /* Called with hold of global mutex */ - - LASSERT (console_session.ses_shutdown); - - while (!list_empty(&console_session.ses_trans_list)) { - list_for_each(pacer, &console_session.ses_trans_list) { - trans = list_entry(pacer, lstcon_rpc_trans_t, tas_link); - - CDEBUG(D_NET, "Session closed, wakeup transaction %s\n", - lstcon_rpc_trans_name(trans->tas_opc)); - - cfs_waitq_signal(&trans->tas_waitq); - } - - mutex_up(&console_session.ses_mutex); - - CWARN("Session is shutting down, " - "waiting for termination of transactions\n"); - cfs_pause(cfs_time_seconds(1)); - - mutex_down(&console_session.ses_mutex); - } - - spin_lock(&console_session.ses_rpc_lock); - - lst_wait_until((atomic_read(&console_session.ses_rpc_counter) == 0), - console_session.ses_rpc_lock, - "Network is not accessable or target is down, " - "waiting for %d console RPCs to being recycled\n", - atomic_read(&console_session.ses_rpc_counter)); - - list_add(&zlist, &console_session.ses_rpc_freelist); - list_del_init(&console_session.ses_rpc_freelist); - - spin_unlock(&console_session.ses_rpc_lock); - - while (!list_empty(&zlist)) { - crpc = list_entry(zlist.next, lstcon_rpc_t, crp_link); - - list_del(&crpc->crp_link); - LIBCFS_FREE(crpc, sizeof(lstcon_rpc_t)); - } -} - -int -lstcon_rpc_module_init(void) -{ - CFS_INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list); - console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger; - console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer; - - console_session.ses_ping = NULL; - - spin_lock_init(&console_session.ses_rpc_lock); - atomic_set(&console_session.ses_rpc_counter, 0); - CFS_INIT_LIST_HEAD(&console_session.ses_rpc_freelist); - - return 0; -} - -void -lstcon_rpc_module_fini(void) -{ - LASSERT (list_empty(&console_session.ses_rpc_freelist)); - LASSERT (atomic_read(&console_session.ses_rpc_counter) == 0); -} - -#endif diff --git a/lnet/selftest/conrpc.h b/lnet/selftest/conrpc.h deleted file mode 100644 index cd575116e2d21ae5f750b69ec1cd46766bf5d67a..0000000000000000000000000000000000000000 --- a/lnet/selftest/conrpc.h +++ /dev/null @@ -1,106 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org - * - * Console rpc - */ - -#ifndef __LST_CONRPC_H__ -#define __LST_CONRPC_H__ - -#ifdef __KERNEL__ -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-types.h> -#include <lnet/lnetst.h> -#include "rpc.h" -#include "selftest.h" - -/* Console rpc and rpc transaction */ -#define LST_TRANS_TIMEOUT 30 -#define LST_TRANS_MIN_TIMEOUT 3 -#define LST_PING_INTERVAL 8 - -struct lstcon_rpc_trans; -struct lstcon_tsb_hdr; -struct lstcon_test; -struct lstcon_node; - -typedef struct lstcon_rpc { - struct list_head crp_link; /* chain on rpc transaction */ - srpc_client_rpc_t *crp_rpc; /* client rpc */ - struct lstcon_node *crp_node; /* destination node */ - struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */ - - int crp_posted:1; /* rpc is posted */ - int crp_finished:1; /* rpc is finished */ - int crp_unpacked:1; /* reply is unpacked */ - int crp_static:1; /* not from RPC buffer */ - int crp_status; /* console rpc errors */ - cfs_time_t crp_stamp; /* replied time stamp */ -} lstcon_rpc_t; - -typedef struct lstcon_rpc_trans { - struct list_head tas_olink; /* link chain on owner list */ - struct list_head tas_link; /* link chain on global list */ - int tas_opc; /* operation code of transaction */ - cfs_waitq_t tas_waitq; /* wait queue head */ - atomic_t tas_remaining; /* # of un-scheduled rpcs */ - struct list_head tas_rpcs_list; /* queued requests */ -} lstcon_rpc_trans_t; - -#define LST_TRANS_PRIVATE 0x1000 - -#define LST_TRANS_SESNEW (LST_TRANS_PRIVATE | 0x01) -#define LST_TRANS_SESEND (LST_TRANS_PRIVATE | 0x02) -#define LST_TRANS_SESQRY 0x03 -#define LST_TRANS_SESPING 0x04 - -#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11) -#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12) -#define LST_TRANS_TSBRUN (LST_TRANS_PRIVATE | 0x13) -#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14) -#define LST_TRANS_TSBCLIQRY 0x15 -#define LST_TRANS_TSBSRVQRY 0x16 - -#define LST_TRANS_STATQRY 0x21 - -typedef int (* lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *); -typedef int (* lstcon_rpc_readent_func_t)(int, srpc_msg_t *, lstcon_rpc_ent_t *); - -int lstcon_sesrpc_prep(struct lstcon_node *nd, - int transop, lstcon_rpc_t **crpc); -int lstcon_dbgrpc_prep(struct lstcon_node *nd, lstcon_rpc_t **crpc); -int lstcon_batrpc_prep(struct lstcon_node *nd, int transop, - struct lstcon_tsb_hdr *tsb, lstcon_rpc_t **crpc); -int lstcon_testrpc_prep(struct lstcon_node *nd, int transop, - struct lstcon_test *test, lstcon_rpc_t **crpc); -int lstcon_statrpc_prep(struct lstcon_node *nd, lstcon_rpc_t **crpc); -void lstcon_rpc_put(lstcon_rpc_t *crpc); -int lstcon_rpc_trans_prep(struct list_head *translist, - int transop, lstcon_rpc_trans_t **transpp); -int lstcon_rpc_trans_ndlist(struct list_head *ndlist, - struct list_head *translist, int transop, - void *arg, lstcon_rpc_cond_func_t condition, - lstcon_rpc_trans_t **transpp); -void lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, - lstcon_trans_stat_t *stat); -int lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, - struct list_head *head_up, - lstcon_rpc_readent_func_t readent); -void lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error); -void lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans); -void lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *req); -int lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout); -int lstcon_rpc_pinger_start(void); -void lstcon_rpc_pinger_stop(void); -void lstcon_rpc_cleanup_wait(void); -int lstcon_rpc_module_init(void); -void lstcon_rpc_module_fini(void); - -#endif - -#endif diff --git a/lnet/selftest/console.c b/lnet/selftest/console.c deleted file mode 100644 index 805091b61b9b4ba5de6cb38f56082488af13f93c..0000000000000000000000000000000000000000 --- a/lnet/selftest/console.c +++ /dev/null @@ -1,1985 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org - * - * Infrastructure of LST console - */ -#ifdef __KERNEL__ - -#include <libcfs/libcfs.h> -#include <lnet/lib-lnet.h> -#include "console.h" -#include "conrpc.h" - -#define LST_NODE_STATE_COUNTER(nd, p) \ -do { \ - if ((nd)->nd_state == LST_NODE_ACTIVE) \ - (p)->nle_nactive ++; \ - else if ((nd)->nd_state == LST_NODE_BUSY) \ - (p)->nle_nbusy ++; \ - else if ((nd)->nd_state == LST_NODE_DOWN) \ - (p)->nle_ndown ++; \ - else \ - (p)->nle_nunknown ++; \ - (p)->nle_nnode ++; \ -} while (0) - -lstcon_session_t console_session; - -void -lstcon_node_get(lstcon_node_t *nd) -{ - LASSERT (nd->nd_ref >= 1); - - nd->nd_ref++; -} - -static int -lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create) -{ - lstcon_ndlink_t *ndl; - unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE; - - LASSERT (id.nid != LNET_NID_ANY); - - list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx], ndl_hlink) { - if (ndl->ndl_node->nd_id.nid != id.nid || - ndl->ndl_node->nd_id.pid != id.pid) - continue; - - lstcon_node_get(ndl->ndl_node); - *ndpp = ndl->ndl_node; - return 0; - } - - if (!create) - return -ENOENT; - - LIBCFS_ALLOC(*ndpp, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t)); - if (*ndpp == NULL) - return -ENOMEM; - - ndl = (lstcon_ndlink_t *)(*ndpp + 1); - - ndl->ndl_node = *ndpp; - - ndl->ndl_node->nd_ref = 1; - ndl->ndl_node->nd_id = id; - ndl->ndl_node->nd_stamp = cfs_time_current(); - ndl->ndl_node->nd_state = LST_NODE_UNKNOWN; - ndl->ndl_node->nd_timeout = 0; - memset(&ndl->ndl_node->nd_ping, 0, sizeof(lstcon_rpc_t)); - - /* queued in global hash & list, no refcount is taken by - * global hash & list, if caller release his refcount, - * node will be released */ - list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]); - list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list); - - return 0; -} - -void -lstcon_node_put(lstcon_node_t *nd) -{ - lstcon_ndlink_t *ndl; - - LASSERT (nd->nd_ref > 0); - - if (--nd->nd_ref > 0) - return; - - ndl = (lstcon_ndlink_t *)(nd + 1); - - LASSERT (!list_empty(&ndl->ndl_link)); - LASSERT (!list_empty(&ndl->ndl_hlink)); - - /* remove from session */ - list_del(&ndl->ndl_link); - list_del(&ndl->ndl_hlink); - - LIBCFS_FREE(nd, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t)); -} - -static int -lstcon_ndlink_find(struct list_head *hash, - lnet_process_id_t id, lstcon_ndlink_t **ndlpp, int create) -{ - unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE; - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; - int rc; - - if (id.nid == LNET_NID_ANY) - return -EINVAL; - - /* search in hash */ - list_for_each_entry(ndl, &hash[idx], ndl_hlink) { - if (ndl->ndl_node->nd_id.nid != id.nid || - ndl->ndl_node->nd_id.pid != id.pid) - continue; - - *ndlpp = ndl; - return 0; - } - - if (create == 0) - return -ENOENT; - - /* find or create in session hash */ - rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0); - if (rc != 0) - return rc; - - LIBCFS_ALLOC(ndl, sizeof(lstcon_ndlink_t)); - if (ndl == NULL) { - lstcon_node_put(nd); - return -ENOMEM; - } - - *ndlpp = ndl; - - ndl->ndl_node = nd; - CFS_INIT_LIST_HEAD(&ndl->ndl_link); - list_add_tail(&ndl->ndl_hlink, &hash[idx]); - - return 0; -} - -static void -lstcon_ndlink_release(lstcon_ndlink_t *ndl) -{ - LASSERT (list_empty(&ndl->ndl_link)); - LASSERT (!list_empty(&ndl->ndl_hlink)); - - list_del(&ndl->ndl_hlink); /* delete from hash */ - lstcon_node_put(ndl->ndl_node); - - LIBCFS_FREE(ndl, sizeof(*ndl)); -} - -static int -lstcon_group_alloc(char *name, lstcon_group_t **grpp) -{ - lstcon_group_t *grp; - int i; - - LIBCFS_ALLOC(grp, offsetof(lstcon_group_t, - grp_ndl_hash[LST_NODE_HASHSIZE])); - if (grp == NULL) - return -ENOMEM; - - memset(grp, 0, offsetof(lstcon_group_t, - grp_ndl_hash[LST_NODE_HASHSIZE])); - - grp->grp_ref = 1; - if (name != NULL) - strcpy(grp->grp_name, name); - - CFS_INIT_LIST_HEAD(&grp->grp_link); - CFS_INIT_LIST_HEAD(&grp->grp_ndl_list); - CFS_INIT_LIST_HEAD(&grp->grp_trans_list); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) - CFS_INIT_LIST_HEAD(&grp->grp_ndl_hash[i]); - - *grpp = grp; - - return 0; -} - -static void -lstcon_group_addref(lstcon_group_t *grp) -{ - grp->grp_ref ++; -} - -static void lstcon_group_ndlink_release(lstcon_group_t *, lstcon_ndlink_t *); - -static void -lstcon_group_drain(lstcon_group_t *grp, int keep) -{ - lstcon_ndlink_t *ndl; - lstcon_ndlink_t *tmp; - - list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) { - if ((ndl->ndl_node->nd_state & keep) == 0) - lstcon_group_ndlink_release(grp, ndl); - } -} - -static void -lstcon_group_decref(lstcon_group_t *grp) -{ - int i; - - if (--grp->grp_ref > 0) - return; - - if (!list_empty(&grp->grp_link)) - list_del(&grp->grp_link); - - lstcon_group_drain(grp, 0); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) { - LASSERT (list_empty(&grp->grp_ndl_hash[i])); - } - - LIBCFS_FREE(grp, offsetof(lstcon_group_t, - grp_ndl_hash[LST_NODE_HASHSIZE])); -} - -static int -lstcon_group_find(char *name, lstcon_group_t **grpp) -{ - lstcon_group_t *grp; - - list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { - if (strncmp(grp->grp_name, name, LST_NAME_SIZE) != 0) - continue; - - lstcon_group_addref(grp); /* +1 ref for caller */ - *grpp = grp; - return 0; - } - - return -ENOENT; -} - -static void -lstcon_group_put(lstcon_group_t *grp) -{ - lstcon_group_decref(grp); -} - -static int -lstcon_group_ndlink_find(lstcon_group_t *grp, lnet_process_id_t id, - lstcon_ndlink_t **ndlpp, int create) -{ - int rc; - - rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create); - if (rc != 0) - return rc; - - if (!list_empty(&(*ndlpp)->ndl_link)) - return 0; - - list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list); - grp->grp_nnode ++; - - return 0; -} - -static void -lstcon_group_ndlink_release(lstcon_group_t *grp, lstcon_ndlink_t *ndl) -{ - list_del_init(&ndl->ndl_link); - lstcon_ndlink_release(ndl); - grp->grp_nnode --; -} - -static void -lstcon_group_ndlink_move(lstcon_group_t *old, - lstcon_group_t *new, lstcon_ndlink_t *ndl) -{ - unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) % - LST_NODE_HASHSIZE; - - list_del(&ndl->ndl_hlink); - list_del(&ndl->ndl_link); - old->grp_nnode --; - - list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]); - list_add_tail(&ndl->ndl_link, &new->grp_ndl_list); - new->grp_nnode ++; - - return; -} - -static void -lstcon_group_move(lstcon_group_t *old, lstcon_group_t *new) -{ - lstcon_ndlink_t *ndl; - - while (!list_empty(&old->grp_ndl_list)) { - ndl = list_entry(old->grp_ndl_list.next, - lstcon_ndlink_t, ndl_link); - lstcon_group_ndlink_move(old, new, ndl); - } -} - -int -lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg) -{ - lstcon_group_t *grp = (lstcon_group_t *)arg; - - switch (transop) { - case LST_TRANS_SESNEW: - if (nd->nd_state == LST_NODE_ACTIVE) - return 0; - break; - - case LST_TRANS_SESEND: - if (nd->nd_state != LST_NODE_ACTIVE) - return 0; - - if (grp != NULL && nd->nd_ref > 1) - return 0; - break; - - case LST_TRANS_SESQRY: - break; - - default: - LBUG(); - } - - return 1; -} - -int -lstcon_sesrpc_readent(int transop, srpc_msg_t *msg, - lstcon_rpc_ent_t *ent_up) -{ - srpc_debug_reply_t *rep; - - switch (transop) { - case LST_TRANS_SESNEW: - case LST_TRANS_SESEND: - return 0; - - case LST_TRANS_SESQRY: - rep = &msg->msg_body.dbg_reply; - - if (copy_to_user(&ent_up->rpe_priv[0], - &rep->dbg_timeout, sizeof(int)) || - copy_to_user(&ent_up->rpe_payload[0], - &rep->dbg_name, LST_NAME_SIZE)) - return -EFAULT; - - return 0; - - default: - LBUG(); - } - - return 0; -} - -static int -lstcon_group_nodes_add(lstcon_group_t *grp, int count, - lnet_process_id_t *ids_up, struct list_head *result_up) -{ - lstcon_rpc_trans_t *trans; - lstcon_ndlink_t *ndl; - lstcon_group_t *tmp; - lnet_process_id_t id; - int i; - int rc; - - rc = lstcon_group_alloc(NULL, &tmp); - if (rc != 0) { - CERROR("Out of memory\n"); - return -ENOMEM; - } - - for (i = 0 ; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - break; - } - - /* skip if it's in this group already */ - rc = lstcon_group_ndlink_find(grp, id, &ndl, 0); - if (rc == 0) - continue; - - /* add to tmp group */ - rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1); - if (rc != 0) { - CERROR("Can't create ndlink, out of memory\n"); - break; - } - } - - if (rc != 0) { - lstcon_group_put(tmp); - return rc; - } - - rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list, - &tmp->grp_trans_list, LST_TRANS_SESNEW, - tmp, lstcon_sesrpc_condition, &trans); - if (rc != 0) { - CERROR("Can't create transaction: %d\n", rc); - lstcon_group_put(tmp); - return rc; - } - - /* post all RPCs */ - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_sesrpc_readent); - /* destroy all RPGs */ - lstcon_rpc_trans_destroy(trans); - - lstcon_group_move(tmp, grp); - lstcon_group_put(tmp); - - return rc; -} - -static int -lstcon_group_nodes_remove(lstcon_group_t *grp, - int count, lnet_process_id_t *ids_up, - struct list_head *result_up) -{ - lstcon_rpc_trans_t *trans; - lstcon_ndlink_t *ndl; - lstcon_group_t *tmp; - lnet_process_id_t id; - int rc; - int i; - - /* End session and remove node from the group */ - - rc = lstcon_group_alloc(NULL, &tmp); - if (rc != 0) { - CERROR("Out of memory\n"); - return -ENOMEM; - } - - for (i = 0; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - goto error; - } - - /* move node to tmp group */ - if (lstcon_group_ndlink_find(grp, id, &ndl, 0) == 0) - lstcon_group_ndlink_move(grp, tmp, ndl); - } - - rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list, - &tmp->grp_trans_list, LST_TRANS_SESEND, - tmp, lstcon_sesrpc_condition, &trans); - if (rc != 0) { - CERROR("Can't create transaction: %d\n", rc); - goto error; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - /* release nodes anyway, because we can't rollback status */ - lstcon_group_put(tmp); - - return rc; -error: - lstcon_group_move(tmp, grp); - lstcon_group_put(tmp); - - return rc; -} - -int -lstcon_group_add(char *name) -{ - lstcon_group_t *grp; - int rc; - - rc = (lstcon_group_find(name, &grp) == 0)? -EEXIST: 0; - if (rc != 0) { - /* find a group with same name */ - lstcon_group_put(grp); - return rc; - } - - rc = lstcon_group_alloc(name, &grp); - if (rc != 0) { - CERROR("Can't allocate descriptor for group %s\n", name); - return -ENOMEM; - } - - list_add_tail(&grp->grp_link, &console_session.ses_grp_list); - - return rc; -} - -int -lstcon_nodes_add(char *name, int count, - lnet_process_id_t *ids_up, struct list_head *result_up) -{ - lstcon_group_t *grp; - int rc; - - LASSERT (count > 0); - LASSERT (ids_up != NULL); - - rc = lstcon_group_find(name, &grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by other threads or test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_put(grp); - - return -EBUSY; - } - - rc = lstcon_group_nodes_add(grp, count, ids_up, result_up); - - lstcon_group_put(grp); - - return rc; -} - -int -lstcon_group_del(char *name) -{ - lstcon_rpc_trans_t *trans; - lstcon_group_t *grp; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group: %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by others threads or test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_put(grp); - return -EBUSY; - } - - rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, - &grp->grp_trans_list, LST_TRANS_SESEND, - grp, lstcon_sesrpc_condition, &trans); - if (rc != 0) { - CERROR("Can't create transaction: %d\n", rc); - lstcon_group_put(grp); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - lstcon_rpc_trans_destroy(trans); - - lstcon_group_put(grp); - /* -ref for session, it's destroyed, - * status can't be rolled back, destroy group anway */ - lstcon_group_put(grp); - - return rc; -} - -int -lstcon_group_clean(char *name, int args) -{ - lstcon_group_t *grp = NULL; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_put(grp); - return -EBUSY; - } - - args = (LST_NODE_ACTIVE | LST_NODE_BUSY | - LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args; - - lstcon_group_drain(grp, args); - - lstcon_group_put(grp); - /* release empty group */ - if (list_empty(&grp->grp_ndl_list)) - lstcon_group_put(grp); - - return 0; -} - -int -lstcon_nodes_remove(char *name, int count, - lnet_process_id_t *ids_up, struct list_head *result_up) -{ - lstcon_group_t *grp = NULL; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group: %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_put(grp); - return -EBUSY; - } - - rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up); - - lstcon_group_put(grp); - /* release empty group */ - if (list_empty(&grp->grp_ndl_list)) - lstcon_group_put(grp); - - return rc; -} - -int -lstcon_group_refresh(char *name, struct list_head *result_up) -{ - lstcon_rpc_trans_t *trans; - lstcon_group_t *grp; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group: %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_put(grp); - return -EBUSY; - } - - /* re-invite all inactive nodes int the group */ - rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, - &grp->grp_trans_list, LST_TRANS_SESNEW, - grp, lstcon_sesrpc_condition, &trans); - if (rc != 0) { - /* local error, return */ - CDEBUG(D_NET, "Can't create transaction: %d\n", rc); - lstcon_group_put(grp); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - /* -ref for me */ - lstcon_group_put(grp); - - return rc; -} - -int -lstcon_group_list(int index, int len, char *name_up) -{ - lstcon_group_t *grp; - - LASSERT (index >= 0); - LASSERT (name_up != NULL); - - list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { - if (index-- == 0) { - return copy_to_user(name_up, grp->grp_name, len) ? - -EFAULT : 0; - } - } - - return -ENOENT; -} - -static int -lstcon_nodes_getent(struct list_head *head, int *index_p, - int *count_p, lstcon_node_ent_t *dents_up) -{ - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; - int count = 0; - int index = 0; - - LASSERT (index_p != NULL && count_p != NULL); - LASSERT (dents_up != NULL); - LASSERT (*index_p >= 0); - LASSERT (*count_p > 0); - - list_for_each_entry(ndl, head, ndl_link) { - if (index++ < *index_p) - continue; - - if (count >= *count_p) - break; - - nd = ndl->ndl_node; - if (copy_to_user(&dents_up[count].nde_id, - &nd->nd_id, sizeof(nd->nd_id)) || - copy_to_user(&dents_up[count].nde_state, - &nd->nd_state, sizeof(nd->nd_state))) - return -EFAULT; - - count ++; - } - - if (index <= *index_p) - return -ENOENT; - - *count_p = count; - *index_p = index; - - return 0; -} - -int -lstcon_group_info(char *name, lstcon_ndlist_ent_t *gents_p, - int *index_p, int *count_p, lstcon_node_ent_t *dents_up) -{ - lstcon_ndlist_ent_t *gentp; - lstcon_group_t *grp; - lstcon_ndlink_t *ndl; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group %s\n", name); - return rc; - } - - if (dents_up != 0) { - /* verbose query */ - rc = lstcon_nodes_getent(&grp->grp_ndl_list, - index_p, count_p, dents_up); - lstcon_group_put(grp); - - return rc; - } - - /* non-verbose query */ - LIBCFS_ALLOC(gentp, sizeof(lstcon_ndlist_ent_t)); - if (gentp == NULL) { - CERROR("Can't allocate ndlist_ent\n"); - lstcon_group_put(grp); - - return -ENOMEM; - } - - memset(gentp, 0, sizeof(lstcon_ndlist_ent_t)); - - list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp); - - rc = copy_to_user(gents_p, gentp, - sizeof(lstcon_ndlist_ent_t)) ? -EFAULT: 0; - - LIBCFS_FREE(gentp, sizeof(lstcon_ndlist_ent_t)); - - lstcon_group_put(grp); - - return 0; -} - -int -lstcon_batch_find(char *name, lstcon_batch_t **batpp) -{ - lstcon_batch_t *bat; - - list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) { - if (strncmp(bat->bat_name, name, LST_NAME_SIZE) == 0) { - *batpp = bat; - return 0; - } - } - - return -ENOENT; -} - -int -lstcon_batch_add(char *name) -{ - lstcon_batch_t *bat; - int i; - int rc; - - rc = (lstcon_batch_find(name, &bat) == 0)? -EEXIST: 0; - if (rc != 0) { - CDEBUG(D_NET, "Batch %s already exists\n", name); - return rc; - } - - LIBCFS_ALLOC(bat, sizeof(lstcon_batch_t)); - if (bat == NULL) { - CERROR("Can't allocate descriptor for batch %s\n", name); - return -ENOMEM; - } - - LIBCFS_ALLOC(bat->bat_cli_hash, - sizeof(struct list_head) * LST_NODE_HASHSIZE); - if (bat->bat_cli_hash == NULL) { - CERROR("Can't allocate hash for batch %s\n", name); - LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); - - return -ENOMEM; - } - - LIBCFS_ALLOC(bat->bat_srv_hash, - sizeof(struct list_head) * LST_NODE_HASHSIZE); - if (bat->bat_srv_hash == NULL) { - CERROR("Can't allocate hash for batch %s\n", name); - LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE); - LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); - - return -ENOMEM; - } - - strcpy(bat->bat_name, name); - bat->bat_hdr.tsb_index = 0; - bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie; - - bat->bat_ntest = 0; - bat->bat_state = LST_BATCH_IDLE; - - CFS_INIT_LIST_HEAD(&bat->bat_cli_list); - CFS_INIT_LIST_HEAD(&bat->bat_srv_list); - CFS_INIT_LIST_HEAD(&bat->bat_test_list); - CFS_INIT_LIST_HEAD(&bat->bat_trans_list); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) { - CFS_INIT_LIST_HEAD(&bat->bat_cli_hash[i]); - CFS_INIT_LIST_HEAD(&bat->bat_srv_hash[i]); - } - - list_add_tail(&bat->bat_link, &console_session.ses_bat_list); - - return rc; -} - -int -lstcon_batch_list(int index, int len, char *name_up) -{ - lstcon_batch_t *bat; - - LASSERT (name_up != NULL); - LASSERT (index >= 0); - - list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) { - if (index-- == 0) { - return copy_to_user(name_up,bat->bat_name, len) ? - -EFAULT: 0; - } - } - - return -ENOENT; -} - -int -lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, int server, - int testidx, int *index_p, int *ndent_p, - lstcon_node_ent_t *dents_up) -{ - lstcon_test_batch_ent_t *entp; - struct list_head *clilst; - struct list_head *srvlst; - lstcon_test_t *test = NULL; - lstcon_batch_t *bat; - lstcon_ndlink_t *ndl; - int rc; - - rc = lstcon_batch_find(name, &bat); - if (rc != 0) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return -ENOENT; - } - - if (testidx > 0) { - /* query test, test index start from 1 */ - list_for_each_entry(test, &bat->bat_test_list, tes_link) { - if (testidx-- == 1) - break; - } - - if (testidx > 0) { - CDEBUG(D_NET, "Can't find specified test in batch\n"); - return -ENOENT; - } - } - - clilst = (test == NULL) ? &bat->bat_cli_list : - &test->tes_src_grp->grp_ndl_list; - srvlst = (test == NULL) ? &bat->bat_srv_list : - &test->tes_dst_grp->grp_ndl_list; - - if (dents_up != NULL) { - rc = lstcon_nodes_getent((server ? srvlst: clilst), - index_p, ndent_p, dents_up); - return rc; - } - - /* non-verbose query */ - LIBCFS_ALLOC(entp, sizeof(lstcon_test_batch_ent_t)); - if (entp == NULL) - return -ENOMEM; - - memset(entp, 0, sizeof(lstcon_test_batch_ent_t)); - - if (test == NULL) { - entp->u.tbe_batch.bae_ntest = bat->bat_ntest; - entp->u.tbe_batch.bae_state = bat->bat_state; - - } else { - - entp->u.tbe_test.tse_type = test->tes_type; - entp->u.tbe_test.tse_loop = test->tes_loop; - entp->u.tbe_test.tse_concur = test->tes_concur; - } - - list_for_each_entry(ndl, clilst, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle); - - list_for_each_entry(ndl, srvlst, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle); - - rc = copy_to_user(ent_up, entp, - sizeof(lstcon_test_batch_ent_t)) ? -EFAULT : 0; - - LIBCFS_FREE(entp, sizeof(lstcon_test_batch_ent_t)); - - return rc; -} - -int -lstcon_batrpc_condition(int transop, lstcon_node_t *nd, void *arg) -{ - switch (transop) { - case LST_TRANS_TSBRUN: - if (nd->nd_state != LST_NODE_ACTIVE) - return -ENETDOWN; - break; - - case LST_TRANS_TSBSTOP: - if (nd->nd_state != LST_NODE_ACTIVE) - return 0; - break; - - case LST_TRANS_TSBCLIQRY: - case LST_TRANS_TSBSRVQRY: - break; - } - - return 1; -} - -static int -lstcon_batch_op(lstcon_batch_t *bat, int transop, struct list_head *result_up) -{ - lstcon_rpc_trans_t *trans; - int rc; - - rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list, - &bat->bat_trans_list, transop, - bat, lstcon_batrpc_condition, &trans); - if (rc != 0) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_batch_run(char *name, int timeout, struct list_head *result_up) -{ - lstcon_batch_t *bat; - int rc; - - if (lstcon_batch_find(name, &bat) != 0) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return -ENOENT; - } - - bat->bat_arg = timeout; - - rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up); - - /* mark batch as running if it's started in any node */ - if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0) != 0) - bat->bat_state = LST_BATCH_RUNNING; - - return rc; -} - -int -lstcon_batch_stop(char *name, int force, struct list_head *result_up) -{ - lstcon_batch_t *bat; - int rc; - - if (lstcon_batch_find(name, &bat) != 0) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return -ENOENT; - } - - bat->bat_arg = force; - - rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up); - - /* mark batch as stopped if all RPCs finished */ - if (lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0) == 0) - bat->bat_state = LST_BATCH_IDLE; - - return rc; -} - -static void -lstcon_batch_destroy(lstcon_batch_t *bat) -{ - lstcon_ndlink_t *ndl; - lstcon_test_t *test; - int i; - - list_del(&bat->bat_link); - - while (!list_empty(&bat->bat_test_list)) { - test = list_entry(bat->bat_test_list.next, - lstcon_test_t, tes_link); - LASSERT (list_empty(&test->tes_trans_list)); - - list_del(&test->tes_link); - - lstcon_group_put(test->tes_src_grp); - lstcon_group_put(test->tes_dst_grp); - - LIBCFS_FREE(test, offsetof(lstcon_test_t, - tes_param[test->tes_paramlen])); - } - - LASSERT (list_empty(&bat->bat_trans_list)); - - while (!list_empty(&bat->bat_cli_list)) { - ndl = list_entry(bat->bat_cli_list.next, - lstcon_ndlink_t, ndl_link); - list_del_init(&ndl->ndl_link); - - lstcon_ndlink_release(ndl); - } - - while (!list_empty(&bat->bat_srv_list)) { - ndl = list_entry(bat->bat_srv_list.next, - lstcon_ndlink_t, ndl_link); - list_del_init(&ndl->ndl_link); - - lstcon_ndlink_release(ndl); - } - - for (i = 0; i < LST_NODE_HASHSIZE; i++) { - LASSERT (list_empty(&bat->bat_cli_hash[i])); - LASSERT (list_empty(&bat->bat_srv_hash[i])); - } - - LIBCFS_FREE(bat->bat_cli_hash, - sizeof(struct list_head) * LST_NODE_HASHSIZE); - LIBCFS_FREE(bat->bat_srv_hash, - sizeof(struct list_head) * LST_NODE_HASHSIZE); - LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); -} - -int -lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg) -{ - lstcon_test_t *test; - lstcon_batch_t *batch; - lstcon_ndlink_t *ndl; - struct list_head *hash; - struct list_head *head; - - test = (lstcon_test_t *)arg; - LASSERT (test != NULL); - - batch = test->tes_batch; - LASSERT (batch != NULL); - - if (test->tes_oneside && - transop == LST_TRANS_TSBSRVADD) - return 0; - - if (nd->nd_state != LST_NODE_ACTIVE) - return -ENETDOWN; - - if (transop == LST_TRANS_TSBCLIADD) { - hash = batch->bat_cli_hash; - head = &batch->bat_cli_list; - - } else { - LASSERT (transop == LST_TRANS_TSBSRVADD); - - hash = batch->bat_srv_hash; - head = &batch->bat_srv_list; - } - - LASSERT (nd->nd_id.nid != LNET_NID_ANY); - - if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1) != 0) - return -ENOMEM; - - if (list_empty(&ndl->ndl_link)) - list_add_tail(&ndl->ndl_link, head); - - return 1; -} - -static int -lstcon_test_nodes_add(lstcon_test_t *test, struct list_head *result_up) -{ - lstcon_rpc_trans_t *trans; - lstcon_group_t *grp; - int transop; - int rc; - - LASSERT (test->tes_src_grp != NULL); - LASSERT (test->tes_dst_grp != NULL); - - transop = LST_TRANS_TSBSRVADD; - grp = test->tes_dst_grp; -again: - rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, - &test->tes_trans_list, transop, - test, lstcon_testrpc_condition, &trans); - if (rc != 0) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - if (lstcon_trans_stat()->trs_rpc_errno != 0 || - lstcon_trans_stat()->trs_fwk_errno != 0) { - lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - /* return if any error */ - CDEBUG(D_NET, "Failed to add test %s, " - "RPC error %d, framework error %d\n", - transop == LST_TRANS_TSBCLIADD ? "client" : "server", - lstcon_trans_stat()->trs_rpc_errno, - lstcon_trans_stat()->trs_fwk_errno); - - return rc; - } - - lstcon_rpc_trans_destroy(trans); - - if (transop == LST_TRANS_TSBCLIADD) - return rc; - - transop = LST_TRANS_TSBCLIADD; - grp = test->tes_src_grp; - test->tes_cliidx = 0; - - /* requests to test clients */ - goto again; -} - -int -lstcon_test_add(char *name, int type, int loop, int concur, - int dist, int span, char *src_name, char * dst_name, - void *param, int paramlen, int *retp, struct list_head *result_up) - -{ - lstcon_group_t *src_grp = NULL; - lstcon_group_t *dst_grp = NULL; - lstcon_test_t *test = NULL; - lstcon_batch_t *batch; - int rc; - - rc = lstcon_batch_find(name, &batch); - if (rc != 0) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return rc; - } - - if (batch->bat_state != LST_BATCH_IDLE) { - CDEBUG(D_NET, "Can't change running batch %s\n", name); - return rc; - } - - rc = lstcon_group_find(src_name, &src_grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group %s\n", src_name); - goto out; - } - - rc = lstcon_group_find(dst_name, &dst_grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group %s\n", dst_name); - goto out; - } - - if (dst_grp->grp_userland) - *retp = 1; - - LIBCFS_ALLOC(test, offsetof(lstcon_test_t, tes_param[paramlen])); - if (!test) { - CERROR("Can't allocate test descriptor\n"); - rc = -ENOMEM; - - goto out; - } - - memset(test, 0, offsetof(lstcon_test_t, tes_param[paramlen])); - test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id; - test->tes_batch = batch; - test->tes_type = type; - test->tes_oneside = 0; /* TODO */ - test->tes_loop = loop; - test->tes_concur = concur; - test->tes_stop_onerr = 1; /* TODO */ - test->tes_span = span; - test->tes_dist = dist; - test->tes_cliidx = 0; /* just used for creating RPC */ - test->tes_src_grp = src_grp; - test->tes_dst_grp = dst_grp; - CFS_INIT_LIST_HEAD(&test->tes_trans_list); - - if (param != NULL) { - test->tes_paramlen = paramlen; - memcpy(&test->tes_param[0], param, paramlen); - } - - rc = lstcon_test_nodes_add(test, result_up); - - if (rc != 0) - goto out; - - if (lstcon_trans_stat()->trs_rpc_errno != 0 || - lstcon_trans_stat()->trs_fwk_errno != 0) - CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, name); - - /* add to test list anyway, so user can check what's going on */ - list_add_tail(&test->tes_link, &batch->bat_test_list); - - batch->bat_ntest ++; - test->tes_hdr.tsb_index = batch->bat_ntest; - - /* hold groups so nobody can change them */ - return rc; -out: - if (test != NULL) - LIBCFS_FREE(test, offsetof(lstcon_test_t, tes_param[paramlen])); - - if (dst_grp != NULL) - lstcon_group_put(dst_grp); - - if (src_grp != NULL) - lstcon_group_put(src_grp); - - return rc; -} - -int -lstcon_test_find(lstcon_batch_t *batch, int idx, lstcon_test_t **testpp) -{ - lstcon_test_t *test; - - list_for_each_entry(test, &batch->bat_test_list, tes_link) { - if (idx == test->tes_hdr.tsb_index) { - *testpp = test; - return 0; - } - } - - return -ENOENT; -} - -int -lstcon_tsbrpc_readent(int transop, srpc_msg_t *msg, - lstcon_rpc_ent_t *ent_up) -{ - srpc_batch_reply_t *rep = &msg->msg_body.bat_reply; - - LASSERT (transop == LST_TRANS_TSBCLIQRY || - transop == LST_TRANS_TSBSRVQRY); - - /* positive errno, framework error code */ - if (copy_to_user(&ent_up->rpe_priv[0], - &rep->bar_active, sizeof(rep->bar_active))) - return -EFAULT; - - return 0; -} - -int -lstcon_test_batch_query(char *name, int testidx, int client, - int timeout, struct list_head *result_up) -{ - lstcon_rpc_trans_t *trans; - struct list_head *translist; - struct list_head *ndlist; - lstcon_tsb_hdr_t *hdr; - lstcon_batch_t *batch; - lstcon_test_t *test = NULL; - int transop; - int rc; - - rc = lstcon_batch_find(name, &batch); - if (rc != 0) { - CDEBUG(D_NET, "Can't find batch: %s\n", name); - return rc; - } - - if (testidx == 0) { - translist = &batch->bat_trans_list; - ndlist = &batch->bat_cli_list; - hdr = &batch->bat_hdr; - - } else { - /* query specified test only */ - rc = lstcon_test_find(batch, testidx, &test); - if (rc != 0) { - CDEBUG(D_NET, "Can't find test: %d\n", testidx); - return rc; - } - - translist = &test->tes_trans_list; - ndlist = &test->tes_src_grp->grp_ndl_list; - hdr = &test->tes_hdr; - } - - transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY; - - rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr, - lstcon_batrpc_condition, &trans); - if (rc != 0) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, timeout); - - if (testidx == 0 && /* query a batch, not a test */ - lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) == 0 && - lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0) == 0) { - /* all RPCs finished, and no active test */ - batch->bat_state = LST_BATCH_IDLE; - } - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_tsbrpc_readent); - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_statrpc_readent(int transop, srpc_msg_t *msg, - lstcon_rpc_ent_t *ent_up) -{ - srpc_stat_reply_t *rep = &msg->msg_body.stat_reply; - sfw_counters_t *sfwk_stat; - srpc_counters_t *srpc_stat; - lnet_counters_t *lnet_stat; - - if (rep->str_status != 0) - return 0; - - sfwk_stat = (sfw_counters_t *)&ent_up->rpe_payload[0]; - srpc_stat = (srpc_counters_t *)((char *)sfwk_stat + sizeof(*sfwk_stat)); - lnet_stat = (lnet_counters_t *)((char *)srpc_stat + sizeof(*srpc_stat)); - - if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) || - copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) || - copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat))) - return -EFAULT; - - return 0; -} - -int -lstcon_ndlist_stat(struct list_head *ndlist, - int timeout, struct list_head *result_up) -{ - struct list_head head; - lstcon_rpc_trans_t *trans; - int rc; - - CFS_INIT_LIST_HEAD(&head); - - rc = lstcon_rpc_trans_ndlist(ndlist, &head, - LST_TRANS_STATQRY, NULL, NULL, &trans); - if (rc != 0) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - timeout = (timeout > LST_TRANS_MIN_TIMEOUT) ? timeout : - LST_TRANS_MIN_TIMEOUT; - lstcon_rpc_trans_postwait(trans, timeout); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_statrpc_readent); - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_group_stat(char *grp_name, int timeout, struct list_head *result_up) -{ - lstcon_group_t *grp; - int rc; - - rc = lstcon_group_find(grp_name, &grp); - if (rc != 0) { - CDEBUG(D_NET, "Can't find group %s\n", grp_name); - return rc; - } - - rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up); - - lstcon_group_put(grp); - - return rc; -} - -int -lstcon_nodes_stat(int count, lnet_process_id_t *ids_up, - int timeout, struct list_head *result_up) -{ - lstcon_ndlink_t *ndl; - lstcon_group_t *tmp; - lnet_process_id_t id; - int i; - int rc; - - rc = lstcon_group_alloc(NULL, &tmp); - if (rc != 0) { - CERROR("Out of memory\n"); - return -ENOMEM; - } - - for (i = 0 ; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - break; - } - - /* add to tmp group */ - rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2); - if (rc != 0) { - CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET, - "Failed to find or create %s: %d\n", - libcfs_id2str(id), rc); - break; - } - } - - if (rc != 0) { - lstcon_group_put(tmp); - return rc; - } - - rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up); - - lstcon_group_put(tmp); - - return rc; -} - -int -lstcon_debug_ndlist(struct list_head *ndlist, - struct list_head *translist, - int timeout, struct list_head *result_up) -{ - lstcon_rpc_trans_t *trans; - int rc; - - rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY, - NULL, lstcon_sesrpc_condition, &trans); - if (rc != 0) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - timeout = (timeout > LST_TRANS_MIN_TIMEOUT) ? timeout : - LST_TRANS_MIN_TIMEOUT; - - lstcon_rpc_trans_postwait(trans, timeout); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_sesrpc_readent); - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_session_debug(int timeout, struct list_head *result_up) -{ - return lstcon_debug_ndlist(&console_session.ses_ndl_list, - NULL, timeout, result_up); -} - -int -lstcon_batch_debug(int timeout, char *name, - int client, struct list_head *result_up) -{ - lstcon_batch_t *bat; - int rc; - - rc = lstcon_batch_find(name, &bat); - if (rc != 0) - return -ENOENT; - - rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list : - &bat->bat_srv_list, - NULL, timeout, result_up); - - return rc; -} - -int -lstcon_group_debug(int timeout, char *name, - struct list_head *result_up) -{ - lstcon_group_t *grp; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc != 0) - return -ENOENT; - - rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL, - timeout, result_up); - lstcon_group_put(grp); - - return rc; -} - -int -lstcon_nodes_debug(int timeout, - int count, lnet_process_id_t *ids_up, - struct list_head *result_up) -{ - lnet_process_id_t id; - lstcon_ndlink_t *ndl; - lstcon_group_t *grp; - int i; - int rc; - - rc = lstcon_group_alloc(NULL, &grp); - if (rc != 0) { - CDEBUG(D_NET, "Out of memory\n"); - return rc; - } - - for (i = 0; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - break; - } - - /* node is added to tmp group */ - rc = lstcon_group_ndlink_find(grp, id, &ndl, 1); - if (rc != 0) { - CERROR("Can't create node link\n"); - break; - } - } - - if (rc != 0) { - lstcon_group_put(grp); - return rc; - } - - rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL, - timeout, result_up); - - lstcon_group_put(grp); - - return rc; -} - -int -lstcon_session_match(lst_sid_t sid) -{ - return (console_session.ses_id.ses_nid == sid.ses_nid && - console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1: 0; -} - -static void -lstcon_new_session_id(lst_sid_t *sid) -{ - lnet_process_id_t id; - - LASSERT (console_session.ses_state == LST_SESSION_NONE); - - LNetGetId(1, &id); - sid->ses_nid = id.nid; - sid->ses_stamp = cfs_time_current(); -} - -extern srpc_service_t lstcon_acceptor_service; - -int -lstcon_session_new(char *name, int key, - int timeout,int force, lst_sid_t *sid_up) -{ - int rc = 0; - int i; - - if (console_session.ses_state != LST_SESSION_NONE) { - /* session exists */ - if (!force) { - CERROR("Session %s already exists\n", - console_session.ses_name); - return -EEXIST; - } - - rc = lstcon_session_end(); - - /* lstcon_session_end() only return local error */ - if (rc != 0) - return rc; - } - - for (i = 0; i < LST_GLOBAL_HASHSIZE; i++) { - LASSERT (list_empty(&console_session.ses_ndl_hash[i])); - } - - rc = lstcon_batch_add(LST_DEFAULT_BATCH); - if (rc != 0) - return rc; - - rc = lstcon_rpc_pinger_start(); - if (rc != 0) { - lstcon_batch_t *bat; - - lstcon_batch_find(LST_DEFAULT_BATCH, &bat); - lstcon_batch_destroy(bat); - - return rc; - } - - lstcon_new_session_id(&console_session.ses_id); - - console_session.ses_key = key; - console_session.ses_state = LST_SESSION_ACTIVE; - console_session.ses_force = !!force; - console_session.ses_timeout = (timeout <= 0)? LST_CONSOLE_TIMEOUT: - timeout; - strcpy(console_session.ses_name, name); - - if (copy_to_user(sid_up, &console_session.ses_id, - sizeof(lst_sid_t)) == 0) - return rc; - - lstcon_session_end(); - - return -EFAULT; -} - -int -lstcon_session_info(lst_sid_t *sid_up, int *key_up, - lstcon_ndlist_ent_t *ndinfo_up, char *name_up, int len) -{ - lstcon_ndlist_ent_t *entp; - lstcon_ndlink_t *ndl; - int rc = 0; - - if (console_session.ses_state != LST_SESSION_ACTIVE) - return -ESRCH; - - LIBCFS_ALLOC(entp, sizeof(*entp)); - if (entp == NULL) - return -ENOMEM; - - memset(entp, 0, sizeof(*entp)); - - list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, entp); - - if (copy_to_user(sid_up, &console_session.ses_id, sizeof(lst_sid_t)) || - copy_to_user(key_up, &console_session.ses_key, sizeof(int)) || - copy_to_user(ndinfo_up, entp, sizeof(*entp)) || - copy_to_user(name_up, console_session.ses_name, len)) - rc = -EFAULT; - - LIBCFS_FREE(entp, sizeof(*entp)); - - return rc; -} - -int -lstcon_session_end() -{ - lstcon_rpc_trans_t *trans; - lstcon_group_t *grp; - lstcon_batch_t *bat; - int rc = 0; - - LASSERT (console_session.ses_state == LST_SESSION_ACTIVE); - - rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list, - NULL, LST_TRANS_SESEND, NULL, - lstcon_sesrpc_condition, &trans); - if (rc != 0) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - console_session.ses_shutdown = 1; - - lstcon_rpc_pinger_stop(); - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - lstcon_rpc_trans_destroy(trans); - /* User can do nothing even rpc failed, so go on */ - - /* waiting for orphan rpcs to die */ - lstcon_rpc_cleanup_wait(); - - console_session.ses_id = LST_INVALID_SID; - console_session.ses_state = LST_SESSION_NONE; - console_session.ses_key = 0; - console_session.ses_force = 0; - - /* destroy all batches */ - while (!list_empty(&console_session.ses_bat_list)) { - bat = list_entry(console_session.ses_bat_list.next, - lstcon_batch_t, bat_link); - - lstcon_batch_destroy(bat); - } - - /* destroy all groups */ - while (!list_empty(&console_session.ses_grp_list)) { - grp = list_entry(console_session.ses_grp_list.next, - lstcon_group_t, grp_link); - LASSERT (grp->grp_ref == 1); - - lstcon_group_put(grp); - } - - /* all nodes should be released */ - LASSERT (list_empty(&console_session.ses_ndl_list)); - - console_session.ses_shutdown = 0; - console_session.ses_expired = 0; - - return rc; -} - -static int -lstcon_acceptor_handle (srpc_server_rpc_t *rpc) -{ - srpc_msg_t *rep = &rpc->srpc_replymsg; - srpc_msg_t *req = &rpc->srpc_reqstbuf->buf_msg; - srpc_join_reqst_t *jreq = &req->msg_body.join_reqst; - srpc_join_reply_t *jrep = &rep->msg_body.join_reply; - lstcon_group_t *grp = NULL; - lstcon_ndlink_t *ndl; - int rc = 0; - - sfw_unpack_message(req); - - mutex_down(&console_session.ses_mutex); - - jrep->join_sid = console_session.ses_id; - - if (console_session.ses_id.ses_nid == LNET_NID_ANY) { - jrep->join_status = ESRCH; - goto out; - } - - if (jreq->join_sid.ses_nid != LNET_NID_ANY && - !lstcon_session_match(jreq->join_sid)) { - jrep->join_status = EBUSY; - goto out; - } - - if (lstcon_group_find(jreq->join_group, &grp) != 0) { - rc = lstcon_group_alloc(jreq->join_group, &grp); - if (rc != 0) { - CERROR("Out of memory\n"); - goto out; - } - - list_add_tail(&grp->grp_link, - &console_session.ses_grp_list); - lstcon_group_addref(grp); - } - - if (grp->grp_ref > 2) { - /* Group in using */ - jrep->join_status = EBUSY; - goto out; - } - - rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0); - if (rc == 0) { - jrep->join_status = EEXIST; - goto out; - } - - rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1); - if (rc != 0) { - CERROR("Out of memory\n"); - goto out; - } - - ndl->ndl_node->nd_state = LST_NODE_ACTIVE; - ndl->ndl_node->nd_timeout = console_session.ses_timeout; - - if (grp->grp_userland == 0) - grp->grp_userland = 1; - - strcpy(jrep->join_session, console_session.ses_name); - jrep->join_timeout = console_session.ses_timeout; - jrep->join_status = 0; - -out: - if (grp != NULL) - lstcon_group_put(grp); - - mutex_up(&console_session.ses_mutex); - - return rc; -} - -srpc_service_t lstcon_acceptor_service = -{ - .sv_name = "join session", - .sv_handler = lstcon_acceptor_handle, - .sv_bulk_ready = NULL, - .sv_id = SRPC_SERVICE_JOIN, - .sv_concur = SFW_SERVICE_CONCURRENCY, -}; - -extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data); - -DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry); - -/* initialize console */ -int -lstcon_console_init(void) -{ - int i; - int n; - int rc; - - memset(&console_session, 0, sizeof(lstcon_session_t)); - - console_session.ses_id = LST_INVALID_SID; - console_session.ses_state = LST_SESSION_NONE; - console_session.ses_timeout = 0; - console_session.ses_force = 0; - console_session.ses_expired = 0; - console_session.ses_laststamp = cfs_time_current_sec(); - - init_mutex(&console_session.ses_mutex); - - CFS_INIT_LIST_HEAD(&console_session.ses_ndl_list); - CFS_INIT_LIST_HEAD(&console_session.ses_grp_list); - CFS_INIT_LIST_HEAD(&console_session.ses_bat_list); - CFS_INIT_LIST_HEAD(&console_session.ses_trans_list); - - LIBCFS_ALLOC(console_session.ses_ndl_hash, - sizeof(struct list_head) * LST_GLOBAL_HASHSIZE); - if (console_session.ses_ndl_hash == NULL) - return -ENOMEM; - - for (i = 0; i < LST_GLOBAL_HASHSIZE; i++) - CFS_INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]); - - rc = srpc_add_service(&lstcon_acceptor_service); - LASSERT (rc != -EBUSY); - if (rc != 0) { - LIBCFS_FREE(console_session.ses_ndl_hash, - sizeof(struct list_head) * LST_GLOBAL_HASHSIZE); - return rc; - } - - n = srpc_service_add_buffers(&lstcon_acceptor_service, SFW_POST_BUFFERS); - if (n != SFW_POST_BUFFERS) { - rc = -ENOMEM; - goto out; - } - - rc = libcfs_register_ioctl(&lstcon_ioctl_handler); - - if (rc == 0) { - lstcon_rpc_module_init(); - return 0; - } - -out: - srpc_shutdown_service(&lstcon_acceptor_service); - srpc_remove_service(&lstcon_acceptor_service); - - LIBCFS_FREE(console_session.ses_ndl_hash, - sizeof(struct list_head) * LST_GLOBAL_HASHSIZE); - - srpc_wait_service_shutdown(&lstcon_acceptor_service); - - return rc; -} - -int -lstcon_console_fini(void) -{ - int i; - - mutex_down(&console_session.ses_mutex); - - libcfs_deregister_ioctl(&lstcon_ioctl_handler); - - srpc_shutdown_service(&lstcon_acceptor_service); - srpc_remove_service(&lstcon_acceptor_service); - - if (console_session.ses_state != LST_SESSION_NONE) - lstcon_session_end(); - - lstcon_rpc_module_fini(); - - mutex_up(&console_session.ses_mutex); - - LASSERT (list_empty(&console_session.ses_ndl_list)); - LASSERT (list_empty(&console_session.ses_grp_list)); - LASSERT (list_empty(&console_session.ses_bat_list)); - LASSERT (list_empty(&console_session.ses_trans_list)); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) { - LASSERT (list_empty(&console_session.ses_ndl_hash[i])); - } - - LIBCFS_FREE(console_session.ses_ndl_hash, - sizeof(struct list_head) * LST_GLOBAL_HASHSIZE); - - srpc_wait_service_shutdown(&lstcon_acceptor_service); - - return 0; -} - -#endif diff --git a/lnet/selftest/console.h b/lnet/selftest/console.h deleted file mode 100644 index 222f542230aa2dd589de9cad2b17c7422bc91ec7..0000000000000000000000000000000000000000 --- a/lnet/selftest/console.h +++ /dev/null @@ -1,190 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org - * - * kernel structure for LST console - */ - -#ifndef __LST_CONSOLE_H__ -#define __LST_CONSOLE_H__ - -#ifdef __KERNEL__ - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-types.h> -#include <lnet/lnetst.h> -#include "selftest.h" -#include "conrpc.h" - -typedef struct lstcon_node { - lnet_process_id_t nd_id; /* id of the node */ - int nd_ref; /* reference count */ - int nd_state; /* state of the node */ - int nd_timeout; /* session timeout */ - cfs_time_t nd_stamp; /* timestamp of last replied RPC */ - struct lstcon_rpc nd_ping; /* ping rpc */ -} lstcon_node_t; /*** node descriptor */ - -typedef struct { - struct list_head ndl_link; /* chain on list */ - struct list_head ndl_hlink; /* chain on hash */ - lstcon_node_t *ndl_node; /* pointer to node */ -} lstcon_ndlink_t; /*** node link descriptor */ - -typedef struct { - struct list_head grp_link; /* chain on global group list */ - int grp_ref; /* reference count */ - int grp_userland; /* has userland nodes */ - int grp_nnode; /* # of nodes */ - char grp_name[LST_NAME_SIZE]; /* group name */ - - struct list_head grp_trans_list; /* transaction list */ - struct list_head grp_ndl_list; /* nodes list */ - struct list_head grp_ndl_hash[0];/* hash table for nodes */ -} lstcon_group_t; /*** (alias of nodes) group descriptor */ - -#define LST_BATCH_IDLE 0xB0 /* idle batch */ -#define LST_BATCH_RUNNING 0xB1 /* running batch */ - -typedef struct lstcon_tsb_hdr { - lst_bid_t tsb_id; /* batch ID */ - int tsb_index; /* test index */ -} lstcon_tsb_hdr_t; - -typedef struct { - lstcon_tsb_hdr_t bat_hdr; /* test_batch header */ - struct list_head bat_link; /* chain on session's batches list */ - int bat_ntest; /* # of test */ - int bat_state; /* state of the batch */ - int bat_arg; /* parameter for run|stop, timeout for run, force for stop */ - char bat_name[LST_NAME_SIZE]; /* name of batch */ - - struct list_head bat_test_list; /* list head of tests (lstcon_test_t) */ - struct list_head bat_trans_list; /* list head of transaction */ - struct list_head bat_cli_list; /* list head of client nodes (lstcon_node_t) */ - struct list_head *bat_cli_hash; /* hash table of client nodes */ - struct list_head bat_srv_list; /* list head of server nodes */ - struct list_head *bat_srv_hash; /* hash table of server nodes */ -} lstcon_batch_t; /*** (tests ) batch descritptor */ - -typedef struct lstcon_test { - lstcon_tsb_hdr_t tes_hdr; /* test batch header */ - struct list_head tes_link; /* chain on batch's tests list */ - lstcon_batch_t *tes_batch; /* pointer to batch */ - - int tes_type; /* type of the test, i.e: bulk, ping */ - int tes_stop_onerr; /* stop on error */ - int tes_oneside; /* one-sided test */ - int tes_concur; /* concurrency */ - int tes_loop; /* loop count */ - int tes_dist; /* nodes distribution of target group */ - int tes_span; /* nodes span of target group */ - int tes_cliidx; /* client index, used for RPC creating */ - - struct list_head tes_trans_list; /* transaction list */ - lstcon_group_t *tes_src_grp; /* group run the test */ - lstcon_group_t *tes_dst_grp; /* target group */ - - int tes_paramlen; /* test parameter length */ - char tes_param[0]; /* test parameter */ -} lstcon_test_t; /*** a single test descriptor */ - -#define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */ -#define LST_NODE_HASHSIZE 239 /* node hash table (for batch or group) */ - -#define LST_SESSION_NONE 0x0 /* no session */ -#define LST_SESSION_ACTIVE 0x1 /* working session */ - -#define LST_CONSOLE_TIMEOUT 300 /* default console timeout */ - -typedef struct { - struct semaphore ses_mutex; /* lock for session, only one thread can enter session */ - lst_sid_t ses_id; /* global session id */ - int ses_key; /* local session key */ - int ses_state; /* state of session */ - int ses_timeout; /* timeout in seconds */ - time_t ses_laststamp; /* last operation stamp (seconds) */ - int ses_force:1; /* force creating */ - int ses_shutdown:1; /* session is shutting down */ - int ses_expired:1; /* console is timedout */ - __u64 ses_id_cookie; /* batch id cookie */ - char ses_name[LST_NAME_SIZE]; /* session name */ - lstcon_rpc_trans_t *ses_ping; /* session pinger */ - stt_timer_t ses_ping_timer; /* timer for pinger */ - lstcon_trans_stat_t ses_trans_stat; /* transaction stats */ - - struct list_head ses_trans_list; /* global list of transaction */ - struct list_head ses_grp_list; /* global list of groups */ - struct list_head ses_bat_list; /* global list of batches */ - struct list_head ses_ndl_list; /* global list of nodes */ - struct list_head *ses_ndl_hash; /* hash table of nodes */ - - spinlock_t ses_rpc_lock; /* serialize */ - atomic_t ses_rpc_counter;/* # of initialized RPCs */ - struct list_head ses_rpc_freelist; /* idle console rpc */ -} lstcon_session_t; /*** session descriptor */ - -extern lstcon_session_t console_session; -static inline lstcon_trans_stat_t * -lstcon_trans_stat(void) -{ - return &console_session.ses_trans_stat; -} - -static inline struct list_head * -lstcon_id2hash (lnet_process_id_t id, struct list_head *hash) -{ - unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE; - - return &hash[idx]; -} - -extern int lstcon_session_match(lst_sid_t sid); -extern int lstcon_session_new(char *name, int key, - int timeout, int flags, lst_sid_t *sid_up); -extern int lstcon_session_info(lst_sid_t *sid_up, int *key, - lstcon_ndlist_ent_t *entp, char *name_up, int len); -extern int lstcon_session_end(void); -extern int lstcon_session_debug(int timeout, struct list_head *result_up); -extern int lstcon_batch_debug(int timeout, char *name, - int client, struct list_head *result_up); -extern int lstcon_group_debug(int timeout, char *name, - struct list_head *result_up); -extern int lstcon_nodes_debug(int timeout, int nnd, lnet_process_id_t *nds_up, - struct list_head *result_up); -extern int lstcon_group_add(char *name); -extern int lstcon_group_del(char *name); -extern int lstcon_group_clean(char *name, int args); -extern int lstcon_group_refresh(char *name, struct list_head *result_up); -extern int lstcon_nodes_add(char *name, int nnd, lnet_process_id_t *nds_up, - struct list_head *result_up); -extern int lstcon_nodes_remove(char *name, int nnd, lnet_process_id_t *nds_up, - struct list_head *result_up); -extern int lstcon_group_info(char *name, lstcon_ndlist_ent_t *gent_up, - int *index_p, int *ndent_p, lstcon_node_ent_t *ndents_up); -extern int lstcon_group_list(int idx, int len, char *name_up); -extern int lstcon_batch_add(char *name); -extern int lstcon_batch_run(char *name, int timeout, struct list_head *result_up); -extern int lstcon_batch_stop(char *name, int force, struct list_head *result_up); -extern int lstcon_test_batch_query(char *name, int testidx, - int client, int timeout, - struct list_head *result_up); -extern int lstcon_batch_del(char *name); -extern int lstcon_batch_list(int idx, int namelen, char *name_up); -extern int lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up, - int server, int testidx, int *index_p, - int *ndent_p, lstcon_node_ent_t *dents_up); -extern int lstcon_group_stat(char *grp_name, int timeout, - struct list_head *result_up); -extern int lstcon_nodes_stat(int count, lnet_process_id_t *ids_up, - int timeout, struct list_head *result_up); -extern int lstcon_test_add(char *name, int type, int loop, int concur, - int dist, int span, char *src_name, char * dst_name, - void *param, int paramlen, int *retp, struct list_head *result_up); -#endif - -#endif diff --git a/lnet/selftest/framework.c b/lnet/selftest/framework.c deleted file mode 100644 index 3fde9713cfcff1cb6b1a5931318c631e8a93f951..0000000000000000000000000000000000000000 --- a/lnet/selftest/framework.c +++ /dev/null @@ -1,1663 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Authors: Isaac Huang <isaac@clusterfs.com> - * Liang Zhen <liangzhen@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - -int brw_inject_errors = 0; -CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644, - "# data errors to inject randomly, zero by default"); - -static int session_timeout = 100; -CFS_MODULE_PARM(session_timeout, "i", int, 0444, - "test session timeout in seconds (100 by default, 0 == never)"); - -#define SFW_TEST_CONCURRENCY 128 -#define SFW_TEST_RPC_TIMEOUT 64 -#define SFW_CLIENT_RPC_TIMEOUT 64 /* in seconds */ -#define SFW_EXTRA_TEST_BUFFERS 8 /* tolerate buggy peers with extra buffers */ - -#define sfw_test_buffers(tsi) ((tsi)->tsi_loop + SFW_EXTRA_TEST_BUFFERS) - -#define sfw_unpack_id(id) \ -do { \ - __swab64s(&(id).nid); \ - __swab32s(&(id).pid); \ -} while (0) - -#define sfw_unpack_sid(sid) \ -do { \ - __swab64s(&(sid).ses_nid); \ - __swab64s(&(sid).ses_stamp); \ -} while (0) - -#define sfw_unpack_fw_counters(fc) \ -do { \ - __swab32s(&(fc).brw_errors); \ - __swab32s(&(fc).ping_errors); \ - __swab32s(&(fc).active_tests); \ - __swab32s(&(fc).active_batches); \ - __swab32s(&(fc).zombie_sessions); \ -} while (0) - -#define sfw_unpack_rpc_counters(rc) \ -do { \ - __swab32s(&(rc).errors); \ - __swab32s(&(rc).rpcs_sent); \ - __swab32s(&(rc).rpcs_rcvd); \ - __swab32s(&(rc).rpcs_dropped); \ - __swab32s(&(rc).rpcs_expired); \ - __swab64s(&(rc).bulk_get); \ - __swab64s(&(rc).bulk_put); \ -} while (0) - -#define sfw_unpack_lnet_counters(lc) \ -do { \ - __swab32s(&(lc).errors); \ - __swab32s(&(lc).msgs_max); \ - __swab32s(&(lc).msgs_alloc); \ - __swab32s(&(lc).send_count); \ - __swab32s(&(lc).recv_count); \ - __swab32s(&(lc).drop_count); \ - __swab32s(&(lc).route_count); \ - __swab64s(&(lc).send_length); \ - __swab64s(&(lc).recv_length); \ - __swab64s(&(lc).drop_length); \ - __swab64s(&(lc).route_length); \ -} while (0) - -#define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive) != 0) -#define sfw_batch_active(b) (atomic_read(&(b)->bat_nactive) != 0) - -struct smoketest_framework { - struct list_head fw_zombie_rpcs; /* RPCs to be recycled */ - struct list_head fw_zombie_sessions; /* stopping sessions */ - struct list_head fw_tests; /* registered test cases */ - atomic_t fw_nzombies; /* # zombie sessions */ - spinlock_t fw_lock; /* serialise */ - sfw_session_t *fw_session; /* _the_ session */ - int fw_shuttingdown; /* shutdown in progress */ - srpc_server_rpc_t *fw_active_srpc; /* running RPC */ -} sfw_data; - -/* forward ref's */ -int sfw_stop_batch (sfw_batch_t *tsb, int force); -void sfw_destroy_session (sfw_session_t *sn); - -static inline sfw_test_case_t * -sfw_find_test_case(int id) -{ - sfw_test_case_t *tsc; - - LASSERT (id <= SRPC_SERVICE_MAX_ID); - LASSERT (id > SRPC_FRAMEWORK_SERVICE_MAX_ID); - - list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) { - if (tsc->tsc_srv_service->sv_id == id) - return tsc; - } - - return NULL; -} - -static int -sfw_register_test (srpc_service_t *service, sfw_test_client_ops_t *cliops) -{ - sfw_test_case_t *tsc; - - if (sfw_find_test_case(service->sv_id) != NULL) { - CERROR ("Failed to register test %s (%d)\n", - service->sv_name, service->sv_id); - return -EEXIST; - } - - LIBCFS_ALLOC(tsc, sizeof(sfw_test_case_t)); - if (tsc == NULL) - return -ENOMEM; - - memset(tsc, 0, sizeof(sfw_test_case_t)); - tsc->tsc_cli_ops = cliops; - tsc->tsc_srv_service = service; - - list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests); - return 0; -} - -void -sfw_add_session_timer (void) -{ - sfw_session_t *sn = sfw_data.fw_session; - stt_timer_t *timer = &sn->sn_timer; - - LASSERT (!sfw_data.fw_shuttingdown); - - if (sn == NULL || sn->sn_timeout == 0) - return; - - LASSERT (!sn->sn_timer_active); - - sn->sn_timer_active = 1; - timer->stt_expires = cfs_time_add(sn->sn_timeout, - cfs_time_current_sec()); - stt_add_timer(timer); - return; -} - -int -sfw_del_session_timer (void) -{ - sfw_session_t *sn = sfw_data.fw_session; - - if (sn == NULL || !sn->sn_timer_active) - return 0; - - LASSERT (sn->sn_timeout != 0); - - if (stt_del_timer(&sn->sn_timer)) { /* timer defused */ - sn->sn_timer_active = 0; - return 0; - } - -#ifndef __KERNEL__ - /* Racing is impossible in single-threaded userland selftest */ - LBUG(); -#endif - return EBUSY; /* racing with sfw_session_expired() */ -} - -/* called with sfw_data.fw_lock held */ -static void -sfw_deactivate_session (void) -{ - sfw_session_t *sn = sfw_data.fw_session; - int nactive = 0; - sfw_batch_t *tsb; - - if (sn == NULL) return; - - LASSERT (!sn->sn_timer_active); - - sfw_data.fw_session = NULL; - atomic_inc(&sfw_data.fw_nzombies); - list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions); - - list_for_each_entry (tsb, &sn->sn_batches, bat_list) { - if (sfw_batch_active(tsb)) { - nactive++; - sfw_stop_batch(tsb, 1); - } - } - - if (nactive != 0) - return; /* wait for active batches to stop */ - - list_del_init(&sn->sn_list); - spin_unlock(&sfw_data.fw_lock); - - sfw_destroy_session(sn); - - spin_lock(&sfw_data.fw_lock); - return; -} - -#ifndef __KERNEL__ - -int -sfw_session_removed (void) -{ - return (sfw_data.fw_session == NULL) ? 1 : 0; -} - -#endif - -void -sfw_session_expired (void *data) -{ - sfw_session_t *sn = data; - - spin_lock(&sfw_data.fw_lock); - - LASSERT (sn->sn_timer_active); - LASSERT (sn == sfw_data.fw_session); - - CWARN ("Session expired! sid: %s-"LPU64", name: %s\n", - libcfs_nid2str(sn->sn_id.ses_nid), - sn->sn_id.ses_stamp, &sn->sn_name[0]); - - sn->sn_timer_active = 0; - sfw_deactivate_session(); - - spin_unlock(&sfw_data.fw_lock); - return; -} - -static inline void -sfw_init_session (sfw_session_t *sn, lst_sid_t sid, const char *name) -{ - stt_timer_t *timer = &sn->sn_timer; - - memset(sn, 0, sizeof(sfw_session_t)); - CFS_INIT_LIST_HEAD(&sn->sn_list); - CFS_INIT_LIST_HEAD(&sn->sn_batches); - atomic_set(&sn->sn_brw_errors, 0); - atomic_set(&sn->sn_ping_errors, 0); - strncpy(&sn->sn_name[0], name, LST_NAME_SIZE); - - sn->sn_timer_active = 0; - sn->sn_id = sid; - sn->sn_timeout = session_timeout; - - timer->stt_data = sn; - timer->stt_func = sfw_session_expired; - CFS_INIT_LIST_HEAD(&timer->stt_list); -} - -/* completion handler for incoming framework RPCs */ -void -sfw_server_rpc_done (srpc_server_rpc_t *rpc) -{ - srpc_service_t *sv = rpc->srpc_service; - int status = rpc->srpc_status; - - CDEBUG (D_NET, - "Incoming framework RPC done: " - "service %s, peer %s, status %s:%d\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.wi_state), - status); - - if (rpc->srpc_bulk != NULL) - sfw_free_pages(rpc); - return; -} - -void -sfw_client_rpc_fini (srpc_client_rpc_t *rpc) -{ - LASSERT (rpc->crpc_bulk.bk_niov == 0); - LASSERT (list_empty(&rpc->crpc_list)); - LASSERT (atomic_read(&rpc->crpc_refcount) == 0); -#ifndef __KERNEL__ - LASSERT (rpc->crpc_bulk.bk_pages == NULL); -#endif - - CDEBUG (D_NET, - "Outgoing framework RPC done: " - "service %d, peer %s, status %s:%d:%d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(rpc->crpc_wi.wi_state), - rpc->crpc_aborted, rpc->crpc_status); - - spin_lock(&sfw_data.fw_lock); - - /* my callers must finish all RPCs before shutting me down */ - LASSERT (!sfw_data.fw_shuttingdown); - list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs); - - spin_unlock(&sfw_data.fw_lock); - return; -} - -sfw_batch_t * -sfw_find_batch (lst_bid_t bid) -{ - sfw_session_t *sn = sfw_data.fw_session; - sfw_batch_t *bat; - - LASSERT (sn != NULL); - - list_for_each_entry (bat, &sn->sn_batches, bat_list) { - if (bat->bat_id.bat_id == bid.bat_id) - return bat; - } - - return NULL; -} - -sfw_batch_t * -sfw_bid2batch (lst_bid_t bid) -{ - sfw_session_t *sn = sfw_data.fw_session; - sfw_batch_t *bat; - - LASSERT (sn != NULL); - - bat = sfw_find_batch(bid); - if (bat != NULL) - return bat; - - LIBCFS_ALLOC(bat, sizeof(sfw_batch_t)); - if (bat == NULL) - return NULL; - - bat->bat_error = 0; - bat->bat_session = sn; - bat->bat_id = bid; - atomic_set(&bat->bat_nactive, 0); - CFS_INIT_LIST_HEAD(&bat->bat_tests); - - list_add_tail(&bat->bat_list, &sn->sn_batches); - return bat; -} - -int -sfw_get_stats (srpc_stat_reqst_t *request, srpc_stat_reply_t *reply) -{ - sfw_session_t *sn = sfw_data.fw_session; - sfw_counters_t *cnt = &reply->str_fw; - sfw_batch_t *bat; - - reply->str_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; - - if (request->str_sid.ses_nid == LNET_NID_ANY) { - reply->str_status = EINVAL; - return 0; - } - - if (sn == NULL || !sfw_sid_equal(request->str_sid, sn->sn_id)) { - reply->str_status = ESRCH; - return 0; - } - - LNET_LOCK(); - reply->str_lnet = the_lnet.ln_counters; - LNET_UNLOCK(); - - srpc_get_counters(&reply->str_rpc); - - cnt->brw_errors = atomic_read(&sn->sn_brw_errors); - cnt->ping_errors = atomic_read(&sn->sn_ping_errors); - cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies); - - cnt->active_tests = cnt->active_batches = 0; - list_for_each_entry (bat, &sn->sn_batches, bat_list) { - int n = atomic_read(&bat->bat_nactive); - - if (n > 0) { - cnt->active_batches++; - cnt->active_tests += n; - } - } - - reply->str_status = 0; - return 0; -} - -int -sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply) -{ - sfw_session_t *sn = sfw_data.fw_session; - - if (request->mksn_sid.ses_nid == LNET_NID_ANY) { - reply->mksn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; - reply->mksn_status = EINVAL; - return 0; - } - - if (sn != NULL && !request->mksn_force) { - reply->mksn_sid = sn->sn_id; - reply->mksn_status = EBUSY; - strncpy(&reply->mksn_name[0], &sn->sn_name[0], LST_NAME_SIZE); - return 0; - } - - LIBCFS_ALLOC(sn, sizeof(sfw_session_t)); - if (sn == NULL) { - CERROR ("Dropping RPC (mksn) under memory pressure.\n"); - return -ENOMEM; - } - - sfw_init_session(sn, request->mksn_sid, &request->mksn_name[0]); - - spin_lock(&sfw_data.fw_lock); - - sfw_deactivate_session(); - LASSERT (sfw_data.fw_session == NULL); - sfw_data.fw_session = sn; - - spin_unlock(&sfw_data.fw_lock); - - reply->mksn_status = 0; - reply->mksn_sid = sn->sn_id; - reply->mksn_timeout = sn->sn_timeout; - return 0; -} - -int -sfw_remove_session (srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply) -{ - sfw_session_t *sn = sfw_data.fw_session; - - reply->rmsn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; - - if (request->rmsn_sid.ses_nid == LNET_NID_ANY) { - reply->rmsn_status = EINVAL; - return 0; - } - - if (sn == NULL || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) { - reply->rmsn_status = (sn == NULL) ? ESRCH : EBUSY; - return 0; - } - - spin_lock(&sfw_data.fw_lock); - sfw_deactivate_session(); - spin_unlock(&sfw_data.fw_lock); - - reply->rmsn_status = 0; - reply->rmsn_sid = LST_INVALID_SID; - LASSERT (sfw_data.fw_session == NULL); - return 0; -} - -int -sfw_debug_session (srpc_debug_reqst_t *request, srpc_debug_reply_t *reply) -{ - sfw_session_t *sn = sfw_data.fw_session; - - if (sn == NULL) { - reply->dbg_status = ESRCH; - reply->dbg_sid = LST_INVALID_SID; - return 0; - } - - reply->dbg_status = 0; - reply->dbg_sid = sn->sn_id; - reply->dbg_timeout = sn->sn_timeout; - strncpy(reply->dbg_name, &sn->sn_name[0], LST_NAME_SIZE); - - return 0; -} - -void -sfw_test_rpc_fini (srpc_client_rpc_t *rpc) -{ - sfw_test_unit_t *tsu = rpc->crpc_priv; - sfw_test_instance_t *tsi = tsu->tsu_instance; - - /* Called with hold of tsi->tsi_lock */ - LASSERT (list_empty(&rpc->crpc_list)); - list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs); -} - -int -sfw_load_test (sfw_test_instance_t *tsi) -{ - sfw_test_case_t *tsc = sfw_find_test_case(tsi->tsi_service); - int nrequired = sfw_test_buffers(tsi); - int nposted; - - LASSERT (tsc != NULL); - - if (tsi->tsi_is_client) { - tsi->tsi_ops = tsc->tsc_cli_ops; - return 0; - } - - nposted = srpc_service_add_buffers(tsc->tsc_srv_service, nrequired); - if (nposted != nrequired) { - CWARN ("Failed to reserve enough buffers: " - "service %s, %d needed, %d reserved\n", - tsc->tsc_srv_service->sv_name, nrequired, nposted); - srpc_service_remove_buffers(tsc->tsc_srv_service, nposted); - return -ENOMEM; - } - - CDEBUG (D_NET, "Reserved %d buffers for test %s\n", - nposted, tsc->tsc_srv_service->sv_name); - return 0; -} - -void -sfw_unload_test (sfw_test_instance_t *tsi) -{ - sfw_test_case_t *tsc = sfw_find_test_case(tsi->tsi_service); - - LASSERT (tsc != NULL); - - if (!tsi->tsi_is_client) - srpc_service_remove_buffers(tsc->tsc_srv_service, - sfw_test_buffers(tsi)); - return; -} - -void -sfw_destroy_test_instance (sfw_test_instance_t *tsi) -{ - srpc_client_rpc_t *rpc; - sfw_test_unit_t *tsu; - - if (!tsi->tsi_is_client) goto clean; - - tsi->tsi_ops->tso_fini(tsi); - - LASSERT (!tsi->tsi_stopping); - LASSERT (list_empty(&tsi->tsi_active_rpcs)); - LASSERT (!sfw_test_active(tsi)); - - while (!list_empty(&tsi->tsi_units)) { - tsu = list_entry(tsi->tsi_units.next, - sfw_test_unit_t, tsu_list); - list_del(&tsu->tsu_list); - LIBCFS_FREE(tsu, sizeof(*tsu)); - } - - while (!list_empty(&tsi->tsi_free_rpcs)) { - rpc = list_entry(tsi->tsi_free_rpcs.next, - srpc_client_rpc_t, crpc_list); - list_del(&rpc->crpc_list); - LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc)); - } - -clean: - sfw_unload_test(tsi); - LIBCFS_FREE(tsi, sizeof(*tsi)); - return; -} - -void -sfw_destroy_batch (sfw_batch_t *tsb) -{ - sfw_test_instance_t *tsi; - - LASSERT (!sfw_batch_active(tsb)); - LASSERT (list_empty(&tsb->bat_list)); - - while (!list_empty(&tsb->bat_tests)) { - tsi = list_entry(tsb->bat_tests.next, - sfw_test_instance_t, tsi_list); - list_del_init(&tsi->tsi_list); - sfw_destroy_test_instance(tsi); - } - - LIBCFS_FREE(tsb, sizeof(sfw_batch_t)); - return; -} - -void -sfw_destroy_session (sfw_session_t *sn) -{ - sfw_batch_t *batch; - - LASSERT (list_empty(&sn->sn_list)); - LASSERT (sn != sfw_data.fw_session); - - while (!list_empty(&sn->sn_batches)) { - batch = list_entry(sn->sn_batches.next, - sfw_batch_t, bat_list); - list_del_init(&batch->bat_list); - sfw_destroy_batch(batch); - } - - LIBCFS_FREE(sn, sizeof(*sn)); - atomic_dec(&sfw_data.fw_nzombies); - return; -} - -void -sfw_unpack_test_req (srpc_msg_t *msg) -{ - srpc_test_reqst_t *req = &msg->msg_body.tes_reqst; - - LASSERT (msg->msg_type == SRPC_MSG_TEST_REQST); - LASSERT (req->tsr_is_client); - - if (msg->msg_magic == SRPC_MSG_MAGIC) - return; /* no flipping needed */ - - LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - if (req->tsr_service == SRPC_SERVICE_BRW) { - test_bulk_req_t *bulk = &req->tsr_u.bulk; - - __swab32s(&bulk->blk_opc); - __swab32s(&bulk->blk_npg); - __swab32s(&bulk->blk_flags); - return; - } - - if (req->tsr_service == SRPC_SERVICE_PING) { - test_ping_req_t *ping = &req->tsr_u.ping; - - __swab32s(&ping->png_size); - __swab32s(&ping->png_flags); - return; - } - - LBUG (); - return; -} - -int -sfw_add_test_instance (sfw_batch_t *tsb, srpc_server_rpc_t *rpc) -{ - srpc_msg_t *msg = &rpc->srpc_reqstbuf->buf_msg; - srpc_test_reqst_t *req = &msg->msg_body.tes_reqst; - srpc_bulk_t *bk = rpc->srpc_bulk; - int ndest = req->tsr_ndest; - sfw_test_unit_t *tsu; - sfw_test_instance_t *tsi; - int i; - int rc; - - LIBCFS_ALLOC(tsi, sizeof(*tsi)); - if (tsi == NULL) { - CERROR ("Can't allocate test instance for batch: "LPU64"\n", - tsb->bat_id.bat_id); - return -ENOMEM; - } - - memset(tsi, 0, sizeof(*tsi)); - spin_lock_init(&tsi->tsi_lock); - atomic_set(&tsi->tsi_nactive, 0); - CFS_INIT_LIST_HEAD(&tsi->tsi_units); - CFS_INIT_LIST_HEAD(&tsi->tsi_free_rpcs); - CFS_INIT_LIST_HEAD(&tsi->tsi_active_rpcs); - - tsi->tsi_stopping = 0; - tsi->tsi_batch = tsb; - tsi->tsi_loop = req->tsr_loop; - tsi->tsi_concur = req->tsr_concur; - tsi->tsi_service = req->tsr_service; - tsi->tsi_is_client = !!(req->tsr_is_client); - tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr); - - rc = sfw_load_test(tsi); - if (rc != 0) { - LIBCFS_FREE(tsi, sizeof(*tsi)); - return rc; - } - - LASSERT (!sfw_batch_active(tsb)); - - if (!tsi->tsi_is_client) { - /* it's test server, just add it to tsb */ - list_add_tail(&tsi->tsi_list, &tsb->bat_tests); - return 0; - } - - LASSERT (bk != NULL); -#ifndef __KERNEL__ - LASSERT (bk->bk_pages != NULL); -#endif - LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= ndest); - LASSERT (bk->bk_len >= sizeof(lnet_process_id_t) * ndest); - - sfw_unpack_test_req(msg); - memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u)); - - for (i = 0; i < ndest; i++) { - lnet_process_id_t *dests; - lnet_process_id_t id; - int j; - -#ifdef __KERNEL__ - dests = cfs_page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page); - LASSERT (dests != NULL); /* my pages are within KVM always */ -#else - dests = cfs_page_address(bk->bk_pages[i / SFW_ID_PER_PAGE]); -#endif - id = dests[i % SFW_ID_PER_PAGE]; - if (msg->msg_magic != SRPC_MSG_MAGIC) - sfw_unpack_id(id); - - for (j = 0; j < tsi->tsi_concur; j++) { - LIBCFS_ALLOC(tsu, sizeof(sfw_test_unit_t)); - if (tsu == NULL) { - rc = -ENOMEM; - CERROR ("Can't allocate tsu for %d\n", - tsi->tsi_service); - goto error; - } - - tsu->tsu_dest = id; - tsu->tsu_instance = tsi; - tsu->tsu_private = NULL; - list_add_tail(&tsu->tsu_list, &tsi->tsi_units); - } - } - - rc = tsi->tsi_ops->tso_init(tsi); - if (rc == 0) { - list_add_tail(&tsi->tsi_list, &tsb->bat_tests); - return 0; - } - -error: - LASSERT (rc != 0); - sfw_destroy_test_instance(tsi); - return rc; -} - -static void -sfw_test_unit_done (sfw_test_unit_t *tsu) -{ - sfw_test_instance_t *tsi = tsu->tsu_instance; - sfw_batch_t *tsb = tsi->tsi_batch; - sfw_session_t *sn = tsb->bat_session; - - LASSERT (sfw_test_active(tsi)); - - if (!atomic_dec_and_test(&tsi->tsi_nactive)) - return; - - /* the test instance is done */ - spin_lock(&tsi->tsi_lock); - - tsi->tsi_stopping = 0; - - spin_unlock(&tsi->tsi_lock); - - spin_lock(&sfw_data.fw_lock); - - if (!atomic_dec_and_test(&tsb->bat_nactive) || /* tsb still active */ - sn == sfw_data.fw_session) { /* sn also active */ - spin_unlock(&sfw_data.fw_lock); - return; - } - - LASSERT (!list_empty(&sn->sn_list)); /* I'm a zombie! */ - - list_for_each_entry (tsb, &sn->sn_batches, bat_list) { - if (sfw_batch_active(tsb)) { - spin_unlock(&sfw_data.fw_lock); - return; - } - } - - list_del_init(&sn->sn_list); - spin_unlock(&sfw_data.fw_lock); - - sfw_destroy_session(sn); - return; -} - -void -sfw_test_rpc_done (srpc_client_rpc_t *rpc) -{ - sfw_test_unit_t *tsu = rpc->crpc_priv; - sfw_test_instance_t *tsi = tsu->tsu_instance; - int done = 0; - - tsi->tsi_ops->tso_done_rpc(tsu, rpc); - - spin_lock(&tsi->tsi_lock); - - LASSERT (sfw_test_active(tsi)); - LASSERT (!list_empty(&rpc->crpc_list)); - - list_del_init(&rpc->crpc_list); - - /* batch is stopping or loop is done or get error */ - if (tsi->tsi_stopping || - tsu->tsu_loop == 0 || - (rpc->crpc_status != 0 && tsi->tsi_stoptsu_onerr)) - done = 1; - - /* dec ref for poster */ - srpc_client_rpc_decref(rpc); - - spin_unlock(&tsi->tsi_lock); - - if (!done) { - swi_schedule_workitem(&tsu->tsu_worker); - return; - } - - sfw_test_unit_done(tsu); - return; -} - -int -sfw_create_test_rpc (sfw_test_unit_t *tsu, lnet_process_id_t peer, - int nblk, int blklen, srpc_client_rpc_t **rpcpp) -{ - srpc_client_rpc_t *rpc = NULL; - sfw_test_instance_t *tsi = tsu->tsu_instance; - - spin_lock(&tsi->tsi_lock); - - LASSERT (sfw_test_active(tsi)); - - if (!list_empty(&tsi->tsi_free_rpcs)) { - /* pick request from buffer */ - rpc = list_entry(tsi->tsi_free_rpcs.next, - srpc_client_rpc_t, crpc_list); - LASSERT (nblk == rpc->crpc_bulk.bk_niov); - list_del_init(&rpc->crpc_list); - - srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk, - blklen, sfw_test_rpc_done, - sfw_test_rpc_fini, tsu); - } - - spin_unlock(&tsi->tsi_lock); - - if (rpc == NULL) - rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk, - blklen, sfw_test_rpc_done, - sfw_test_rpc_fini, tsu); - if (rpc == NULL) { - CERROR ("Can't create rpc for test %d\n", tsi->tsi_service); - return -ENOMEM; - } - - *rpcpp = rpc; - return 0; -} - -int -sfw_run_test (swi_workitem_t *wi) -{ - sfw_test_unit_t *tsu = wi->wi_data; - sfw_test_instance_t *tsi = tsu->tsu_instance; - srpc_client_rpc_t *rpc = NULL; - - LASSERT (wi == &tsu->tsu_worker); - - if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc) != 0) { - LASSERT (rpc == NULL); - goto test_done; - } - - LASSERT (rpc != NULL); - - spin_lock(&tsi->tsi_lock); - - if (tsi->tsi_stopping) { - list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs); - spin_unlock(&tsi->tsi_lock); - goto test_done; - } - - if (tsu->tsu_loop > 0) - tsu->tsu_loop--; - - list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs); - spin_unlock(&tsi->tsi_lock); - - rpc->crpc_timeout = SFW_TEST_RPC_TIMEOUT; - - spin_lock(&rpc->crpc_lock); - srpc_post_rpc(rpc); - spin_unlock(&rpc->crpc_lock); - return 0; - -test_done: - /* - * No one can schedule me now since: - * - previous RPC, if any, has done and - * - no new RPC is initiated. - * - my batch is still active; no one can run it again now. - * Cancel pending schedules and prevent future schedule attempts: - */ - swi_kill_workitem(wi); - sfw_test_unit_done(tsu); - return 1; -} - -int -sfw_run_batch (sfw_batch_t *tsb) -{ - swi_workitem_t *wi; - sfw_test_unit_t *tsu; - sfw_test_instance_t *tsi; - - if (sfw_batch_active(tsb)) { - CDEBUG (D_NET, "Can't start active batch: "LPU64" (%d)\n", - tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive)); - return -EPERM; - } - - list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) { - if (!tsi->tsi_is_client) /* skip server instances */ - continue; - - LASSERT (!tsi->tsi_stopping); - LASSERT (!sfw_test_active(tsi)); - - atomic_inc(&tsb->bat_nactive); - - list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) { - atomic_inc(&tsi->tsi_nactive); - tsu->tsu_loop = tsi->tsi_loop; - wi = &tsu->tsu_worker; - swi_init_workitem(wi, tsu, sfw_run_test); - swi_schedule_workitem(wi); - } - } - - return 0; -} - -int -sfw_stop_batch (sfw_batch_t *tsb, int force) -{ - sfw_test_instance_t *tsi; - srpc_client_rpc_t *rpc; - - if (!sfw_batch_active(tsb)) - return -EPERM; - - list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) { - spin_lock(&tsi->tsi_lock); - - if (!tsi->tsi_is_client || - !sfw_test_active(tsi) || tsi->tsi_stopping) { - spin_unlock(&tsi->tsi_lock); - continue; - } - - tsi->tsi_stopping = 1; - - if (!force) { - spin_unlock(&tsi->tsi_lock); - continue; - } - - /* abort launched rpcs in the test */ - list_for_each_entry (rpc, &tsi->tsi_active_rpcs, crpc_list) { - spin_lock(&rpc->crpc_lock); - - srpc_abort_rpc(rpc, -EINTR); - - spin_unlock(&rpc->crpc_lock); - } - - spin_unlock(&tsi->tsi_lock); - } - - return 0; -} - -int -sfw_query_batch (sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply) -{ - sfw_test_instance_t *tsi; - - if (testidx < 0) - return -EINVAL; - - if (testidx == 0) { - reply->bar_active = atomic_read(&tsb->bat_nactive); - return 0; - } - - list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) { - if (testidx-- > 1) - continue; - - reply->bar_active = atomic_read(&tsi->tsi_nactive); - return 0; - } - - return -ENOENT; -} - -void -sfw_free_pages (srpc_server_rpc_t *rpc) -{ - srpc_free_bulk(rpc->srpc_bulk); - rpc->srpc_bulk = NULL; -} - -int -sfw_alloc_pages (srpc_server_rpc_t *rpc, int npages, int sink) -{ - LASSERT (rpc->srpc_bulk == NULL); - LASSERT (npages > 0 && npages <= LNET_MAX_IOV); - - rpc->srpc_bulk = srpc_alloc_bulk(npages, sink); - if (rpc->srpc_bulk == NULL) return -ENOMEM; - - return 0; -} - -int -sfw_add_test (srpc_server_rpc_t *rpc) -{ - sfw_session_t *sn = sfw_data.fw_session; - srpc_test_reply_t *reply = &rpc->srpc_replymsg.msg_body.tes_reply; - srpc_test_reqst_t *request; - int rc; - sfw_batch_t *bat; - - request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst; - reply->tsr_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; - - if (request->tsr_loop == 0 || - request->tsr_concur == 0 || - request->tsr_sid.ses_nid == LNET_NID_ANY || - request->tsr_ndest > SFW_MAX_NDESTS || - (request->tsr_is_client && request->tsr_ndest == 0) || - request->tsr_concur > SFW_MAX_CONCUR || - request->tsr_service > SRPC_SERVICE_MAX_ID || - request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) { - reply->tsr_status = EINVAL; - return 0; - } - - if (sn == NULL || !sfw_sid_equal(request->tsr_sid, sn->sn_id) || - sfw_find_test_case(request->tsr_service) == NULL) { - reply->tsr_status = ENOENT; - return 0; - } - - bat = sfw_bid2batch(request->tsr_bid); - if (bat == NULL) { - CERROR ("Dropping RPC (%s) from %s under memory pressure.\n", - rpc->srpc_service->sv_name, - libcfs_id2str(rpc->srpc_peer)); - return -ENOMEM; - } - - if (sfw_batch_active(bat)) { - reply->tsr_status = EBUSY; - return 0; - } - - if (request->tsr_is_client && rpc->srpc_bulk == NULL) { - /* rpc will be resumed later in sfw_bulk_ready */ - return sfw_alloc_pages(rpc, - sfw_id_pages(request->tsr_ndest), 1); - } - - rc = sfw_add_test_instance(bat, rpc); - CDEBUG (rc == 0 ? D_NET : D_WARNING, - "%s test: sv %d %s, loop %d, concur %d, ndest %d\n", - rc == 0 ? "Added" : "Failed to add", request->tsr_service, - request->tsr_is_client ? "client" : "server", - request->tsr_loop, request->tsr_concur, request->tsr_ndest); - - reply->tsr_status = (rc < 0) ? -rc : rc; - return 0; -} - -int -sfw_control_batch (srpc_batch_reqst_t *request, srpc_batch_reply_t *reply) -{ - sfw_session_t *sn = sfw_data.fw_session; - int rc = 0; - sfw_batch_t *bat; - - reply->bar_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id; - - if (sn == NULL || !sfw_sid_equal(request->bar_sid, sn->sn_id)) { - reply->bar_status = ESRCH; - return 0; - } - - bat = sfw_find_batch(request->bar_bid); - if (bat == NULL) { - reply->bar_status = ENOENT; - return 0; - } - - switch (request->bar_opc) { - case SRPC_BATCH_OPC_RUN: - rc = sfw_run_batch(bat); - break; - - case SRPC_BATCH_OPC_STOP: - rc = sfw_stop_batch(bat, request->bar_arg); - break; - - case SRPC_BATCH_OPC_QUERY: - rc = sfw_query_batch(bat, request->bar_testidx, reply); - break; - - default: - return -EINVAL; /* drop it */ - } - - reply->bar_status = (rc < 0) ? -rc : rc; - return 0; -} - -int -sfw_handle_server_rpc (srpc_server_rpc_t *rpc) -{ - srpc_service_t *sv = rpc->srpc_service; - srpc_msg_t *reply = &rpc->srpc_replymsg; - srpc_msg_t *request = &rpc->srpc_reqstbuf->buf_msg; - int rc = 0; - - LASSERT (sfw_data.fw_active_srpc == NULL); - LASSERT (sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - - spin_lock(&sfw_data.fw_lock); - - if (sfw_data.fw_shuttingdown) { - spin_unlock(&sfw_data.fw_lock); - return -ESHUTDOWN; - } - - /* Remove timer to avoid racing with it or expiring active session */ - if (sfw_del_session_timer() != 0) { - CERROR ("Dropping RPC (%s) from %s: racing with expiry timer.", - sv->sv_name, libcfs_id2str(rpc->srpc_peer)); - spin_unlock(&sfw_data.fw_lock); - return -EAGAIN; - } - - sfw_data.fw_active_srpc = rpc; - spin_unlock(&sfw_data.fw_lock); - - sfw_unpack_message(request); - LASSERT (request->msg_type == srpc_service2request(sv->sv_id)); - - switch(sv->sv_id) { - default: - LBUG (); - case SRPC_SERVICE_TEST: - rc = sfw_add_test(rpc); - break; - - case SRPC_SERVICE_BATCH: - rc = sfw_control_batch(&request->msg_body.bat_reqst, - &reply->msg_body.bat_reply); - break; - - case SRPC_SERVICE_QUERY_STAT: - rc = sfw_get_stats(&request->msg_body.stat_reqst, - &reply->msg_body.stat_reply); - break; - - case SRPC_SERVICE_DEBUG: - rc = sfw_debug_session(&request->msg_body.dbg_reqst, - &reply->msg_body.dbg_reply); - break; - - case SRPC_SERVICE_MAKE_SESSION: - rc = sfw_make_session(&request->msg_body.mksn_reqst, - &reply->msg_body.mksn_reply); - break; - - case SRPC_SERVICE_REMOVE_SESSION: - rc = sfw_remove_session(&request->msg_body.rmsn_reqst, - &reply->msg_body.rmsn_reply); - break; - } - - rpc->srpc_done = sfw_server_rpc_done; - spin_lock(&sfw_data.fw_lock); - -#ifdef __KERNEL__ - if (!sfw_data.fw_shuttingdown) - sfw_add_session_timer(); -#else - LASSERT (!sfw_data.fw_shuttingdown); - sfw_add_session_timer(); -#endif - - sfw_data.fw_active_srpc = NULL; - spin_unlock(&sfw_data.fw_lock); - return rc; -} - -int -sfw_bulk_ready (srpc_server_rpc_t *rpc, int status) -{ - srpc_service_t *sv = rpc->srpc_service; - int rc; - - LASSERT (rpc->srpc_bulk != NULL); - LASSERT (sv->sv_id == SRPC_SERVICE_TEST); - LASSERT (sfw_data.fw_active_srpc == NULL); - LASSERT (rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client); - - spin_lock(&sfw_data.fw_lock); - - if (status != 0) { - CERROR ("Bulk transfer failed for RPC: " - "service %s, peer %s, status %d\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer), status); - spin_unlock(&sfw_data.fw_lock); - return -EIO; - } - - if (sfw_data.fw_shuttingdown) { - spin_unlock(&sfw_data.fw_lock); - return -ESHUTDOWN; - } - - if (sfw_del_session_timer() != 0) { - CERROR ("Dropping RPC (%s) from %s: racing with expiry timer", - sv->sv_name, libcfs_id2str(rpc->srpc_peer)); - spin_unlock(&sfw_data.fw_lock); - return -EAGAIN; - } - - sfw_data.fw_active_srpc = rpc; - spin_unlock(&sfw_data.fw_lock); - - rc = sfw_add_test(rpc); - - spin_lock(&sfw_data.fw_lock); - -#ifdef __KERNEL__ - if (!sfw_data.fw_shuttingdown) - sfw_add_session_timer(); -#else - LASSERT (!sfw_data.fw_shuttingdown); - sfw_add_session_timer(); -#endif - - sfw_data.fw_active_srpc = NULL; - spin_unlock(&sfw_data.fw_lock); - return rc; -} - -srpc_client_rpc_t * -sfw_create_rpc (lnet_process_id_t peer, int service, - int nbulkiov, int bulklen, - void (*done) (srpc_client_rpc_t *), void *priv) -{ - srpc_client_rpc_t *rpc; - - spin_lock(&sfw_data.fw_lock); - - LASSERT (!sfw_data.fw_shuttingdown); - LASSERT (service <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - - if (nbulkiov == 0 && !list_empty(&sfw_data.fw_zombie_rpcs)) { - rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - srpc_client_rpc_t, crpc_list); - list_del(&rpc->crpc_list); - spin_unlock(&sfw_data.fw_lock); - - srpc_init_client_rpc(rpc, peer, service, 0, 0, - done, sfw_client_rpc_fini, priv); - return rpc; - } - - spin_unlock(&sfw_data.fw_lock); - - rpc = srpc_create_client_rpc(peer, service, nbulkiov, bulklen, done, - nbulkiov != 0 ? NULL : sfw_client_rpc_fini, - priv); - return rpc; -} - -void -sfw_unpack_message (srpc_msg_t *msg) -{ - if (msg->msg_magic == SRPC_MSG_MAGIC) - return; /* no flipping needed */ - - LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - __swab32s(&msg->msg_type); - - if (msg->msg_type == SRPC_MSG_STAT_REQST) { - srpc_stat_reqst_t *req = &msg->msg_body.stat_reqst; - - __swab32s(&req->str_type); - __swab64s(&req->str_rpyid); - sfw_unpack_sid(req->str_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_STAT_REPLY) { - srpc_stat_reply_t *rep = &msg->msg_body.stat_reply; - - __swab32s(&rep->str_status); - sfw_unpack_sid(rep->str_sid); - sfw_unpack_fw_counters(rep->str_fw); - sfw_unpack_rpc_counters(rep->str_rpc); - sfw_unpack_lnet_counters(rep->str_lnet); - return; - } - - if (msg->msg_type == SRPC_MSG_MKSN_REQST) { - srpc_mksn_reqst_t *req = &msg->msg_body.mksn_reqst; - - __swab64s(&req->mksn_rpyid); - __swab32s(&req->mksn_force); - sfw_unpack_sid(req->mksn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_MKSN_REPLY) { - srpc_mksn_reply_t *rep = &msg->msg_body.mksn_reply; - - __swab32s(&rep->mksn_status); - __swab32s(&rep->mksn_timeout); - sfw_unpack_sid(rep->mksn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_RMSN_REQST) { - srpc_rmsn_reqst_t *req = &msg->msg_body.rmsn_reqst; - - __swab64s(&req->rmsn_rpyid); - sfw_unpack_sid(req->rmsn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_RMSN_REPLY) { - srpc_rmsn_reply_t *rep = &msg->msg_body.rmsn_reply; - - __swab32s(&rep->rmsn_status); - sfw_unpack_sid(rep->rmsn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_DEBUG_REQST) { - srpc_debug_reqst_t *req = &msg->msg_body.dbg_reqst; - - __swab64s(&req->dbg_rpyid); - __swab32s(&req->dbg_flags); - sfw_unpack_sid(req->dbg_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) { - srpc_debug_reply_t *rep = &msg->msg_body.dbg_reply; - - __swab32s(&rep->dbg_nbatch); - __swab32s(&rep->dbg_timeout); - sfw_unpack_sid(rep->dbg_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_BATCH_REQST) { - srpc_batch_reqst_t *req = &msg->msg_body.bat_reqst; - - __swab32s(&req->bar_opc); - __swab64s(&req->bar_rpyid); - __swab32s(&req->bar_testidx); - __swab32s(&req->bar_arg); - sfw_unpack_sid(req->bar_sid); - __swab64s(&req->bar_bid.bat_id); - return; - } - - if (msg->msg_type == SRPC_MSG_BATCH_REPLY) { - srpc_batch_reply_t *rep = &msg->msg_body.bat_reply; - - __swab32s(&rep->bar_status); - sfw_unpack_sid(rep->bar_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_TEST_REQST) { - srpc_test_reqst_t *req = &msg->msg_body.tes_reqst; - - __swab64s(&req->tsr_rpyid); - __swab64s(&req->tsr_bulkid); - __swab32s(&req->tsr_loop); - __swab32s(&req->tsr_ndest); - __swab32s(&req->tsr_concur); - __swab32s(&req->tsr_service); - sfw_unpack_sid(req->tsr_sid); - __swab64s(&req->tsr_bid.bat_id); - return; - } - - if (msg->msg_type == SRPC_MSG_TEST_REPLY) { - srpc_test_reply_t *rep = &msg->msg_body.tes_reply; - - __swab32s(&rep->tsr_status); - sfw_unpack_sid(rep->tsr_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_JOIN_REQST) { - srpc_join_reqst_t *req = &msg->msg_body.join_reqst; - - __swab64s(&req->join_rpyid); - sfw_unpack_sid(req->join_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_JOIN_REPLY) { - srpc_join_reply_t *rep = &msg->msg_body.join_reply; - - __swab32s(&rep->join_status); - __swab32s(&rep->join_timeout); - sfw_unpack_sid(rep->join_sid); - return; - } - - LBUG (); - return; -} - -void -sfw_abort_rpc (srpc_client_rpc_t *rpc) -{ - LASSERT (atomic_read(&rpc->crpc_refcount) > 0); - LASSERT (rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - - spin_lock(&rpc->crpc_lock); - srpc_abort_rpc(rpc, -EINTR); - spin_unlock(&rpc->crpc_lock); - return; -} - -void -sfw_post_rpc (srpc_client_rpc_t *rpc) -{ - spin_lock(&rpc->crpc_lock); - - LASSERT (!rpc->crpc_closed); - LASSERT (!rpc->crpc_aborted); - LASSERT (list_empty(&rpc->crpc_list)); - LASSERT (!sfw_data.fw_shuttingdown); - - rpc->crpc_timeout = SFW_CLIENT_RPC_TIMEOUT; - srpc_post_rpc(rpc); - - spin_unlock(&rpc->crpc_lock); - return; -} - -static srpc_service_t sfw_services[] = -{ - { - .sv_name = "debug", - .sv_id = SRPC_SERVICE_DEBUG, - }, - { - .sv_name = "query stats", - .sv_id = SRPC_SERVICE_QUERY_STAT, - }, - { - .sv_name = "make sessin", - .sv_id = SRPC_SERVICE_MAKE_SESSION, - }, - { - .sv_name = "remove session", - .sv_id = SRPC_SERVICE_REMOVE_SESSION, - }, - { - .sv_name = "batch service", - .sv_id = SRPC_SERVICE_BATCH, - }, - { - .sv_name = "test service", - .sv_id = SRPC_SERVICE_TEST, - }, - { .sv_name = NULL, } -}; - -extern sfw_test_client_ops_t ping_test_client; -extern srpc_service_t ping_test_service; - -extern sfw_test_client_ops_t brw_test_client; -extern srpc_service_t brw_test_service; - -int -sfw_startup (void) -{ - int i; - int rc; - int error; - srpc_service_t *sv; - sfw_test_case_t *tsc; - -#ifndef __KERNEL__ - char *s; - - s = getenv("SESSION_TIMEOUT"); - session_timeout = s != NULL ? atoi(s) : session_timeout; - - s = getenv("BRW_INJECT_ERRORS"); - brw_inject_errors = s != NULL ? atoi(s) : brw_inject_errors; -#endif - - if (session_timeout < 0) { - CERROR ("Session timeout must be non-negative: %d\n", - session_timeout); - return -EINVAL; - } - - if (session_timeout == 0) - CWARN ("Zero session_timeout specified " - "- test sessions never expire.\n"); - - memset(&sfw_data, 0, sizeof(struct smoketest_framework)); - - sfw_data.fw_session = NULL; - sfw_data.fw_active_srpc = NULL; - spin_lock_init(&sfw_data.fw_lock); - atomic_set(&sfw_data.fw_nzombies, 0); - CFS_INIT_LIST_HEAD(&sfw_data.fw_tests); - CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs); - CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions); - - rc = sfw_register_test(&brw_test_service, &brw_test_client); - LASSERT (rc == 0); - rc = sfw_register_test(&ping_test_service, &ping_test_client); - LASSERT (rc == 0); - - error = 0; - list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) { - sv = tsc->tsc_srv_service; - sv->sv_concur = SFW_TEST_CONCURRENCY; - - rc = srpc_add_service(sv); - LASSERT (rc != -EBUSY); - if (rc != 0) { - CWARN ("Failed to add %s service: %d\n", - sv->sv_name, rc); - error = rc; - } - } - - for (i = 0; ; i++) { - sv = &sfw_services[i]; - if (sv->sv_name == NULL) break; - - sv->sv_bulk_ready = NULL; - sv->sv_handler = sfw_handle_server_rpc; - sv->sv_concur = SFW_SERVICE_CONCURRENCY; - if (sv->sv_id == SRPC_SERVICE_TEST) - sv->sv_bulk_ready = sfw_bulk_ready; - - rc = srpc_add_service(sv); - LASSERT (rc != -EBUSY); - if (rc != 0) { - CWARN ("Failed to add %s service: %d\n", - sv->sv_name, rc); - error = rc; - } - - /* about to sfw_shutdown, no need to add buffer */ - if (error) continue; - - rc = srpc_service_add_buffers(sv, SFW_POST_BUFFERS); - if (rc != SFW_POST_BUFFERS) { - CWARN ("Failed to reserve enough buffers: " - "service %s, %d needed, %d reserved\n", - sv->sv_name, SFW_POST_BUFFERS, rc); - error = -ENOMEM; - } - } - - if (error != 0) - sfw_shutdown(); - return error; -} - -void -sfw_shutdown (void) -{ - srpc_service_t *sv; - sfw_test_case_t *tsc; - int i; - - spin_lock(&sfw_data.fw_lock); - - sfw_data.fw_shuttingdown = 1; -#ifdef __KERNEL__ - lst_wait_until(sfw_data.fw_active_srpc == NULL, sfw_data.fw_lock, - "waiting for active RPC to finish.\n"); -#else - LASSERT (sfw_data.fw_active_srpc == NULL); -#endif - - if (sfw_del_session_timer() != 0) - lst_wait_until(sfw_data.fw_session == NULL, sfw_data.fw_lock, - "waiting for session timer to explode.\n"); - - sfw_deactivate_session(); - lst_wait_until(atomic_read(&sfw_data.fw_nzombies) == 0, - sfw_data.fw_lock, - "waiting for %d zombie sessions to die.\n", - atomic_read(&sfw_data.fw_nzombies)); - - spin_unlock(&sfw_data.fw_lock); - - for (i = 0; ; i++) { - sv = &sfw_services[i]; - if (sv->sv_name == NULL) - break; - - srpc_shutdown_service(sv); - srpc_remove_service(sv); - } - - list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) { - sv = tsc->tsc_srv_service; - srpc_shutdown_service(sv); - srpc_remove_service(sv); - } - - while (!list_empty(&sfw_data.fw_zombie_rpcs)) { - srpc_client_rpc_t *rpc; - - rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - srpc_client_rpc_t, crpc_list); - list_del(&rpc->crpc_list); - - LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc)); - } - - for (i = 0; ; i++) { - sv = &sfw_services[i]; - if (sv->sv_name == NULL) - break; - - srpc_wait_service_shutdown(sv); - } - - while (!list_empty(&sfw_data.fw_tests)) { - tsc = list_entry(sfw_data.fw_tests.next, - sfw_test_case_t, tsc_list); - - srpc_wait_service_shutdown(tsc->tsc_srv_service); - - list_del(&tsc->tsc_list); - LIBCFS_FREE(tsc, sizeof(*tsc)); - } - - return; -} diff --git a/lnet/selftest/module.c b/lnet/selftest/module.c deleted file mode 100644 index 5986acd36926092ff210c7ec4781ff96cce0a922..0000000000000000000000000000000000000000 --- a/lnet/selftest/module.c +++ /dev/null @@ -1,106 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - - -#define LST_INIT_NONE 0 -#define LST_INIT_RPC 1 -#define LST_INIT_FW 2 -#define LST_INIT_CONSOLE 3 - -extern int lstcon_console_init(void); -extern int lstcon_console_fini(void); - -static int lst_init_step = LST_INIT_NONE; - -void -lnet_selftest_fini (void) -{ - switch (lst_init_step) { -#ifdef __KERNEL__ - case LST_INIT_CONSOLE: - lstcon_console_fini(); -#endif - case LST_INIT_FW: - sfw_shutdown(); - case LST_INIT_RPC: - srpc_shutdown(); - case LST_INIT_NONE: - break; - default: - LBUG(); - } - return; -} - -int -lnet_selftest_init (void) -{ - int rc; - - rc = srpc_startup(); - if (rc != 0) { - CERROR("LST can't startup rpc\n"); - goto error; - } - lst_init_step = LST_INIT_RPC; - - rc = sfw_startup(); - if (rc != 0) { - CERROR("LST can't startup framework\n"); - goto error; - } - lst_init_step = LST_INIT_FW; - -#ifdef __KERNEL__ - rc = lstcon_console_init(); - if (rc != 0) { - CERROR("LST can't startup console\n"); - goto error; - } - lst_init_step = LST_INIT_CONSOLE; -#endif - - return 0; -error: - lnet_selftest_fini(); - return rc; -} - -#ifdef __KERNEL__ - -MODULE_DESCRIPTION("LNet Selftest"); -MODULE_LICENSE("GPL"); - -cfs_module(lnet, "0.9.0", lnet_selftest_init, lnet_selftest_fini); - -#else - -int -selftest_wait_events (void) -{ - int evts = 0; - - for (;;) { - /* Consume all pending events */ - while (srpc_check_event(0)) - evts++; - evts += stt_check_events(); - evts += swi_check_events(); - if (evts != 0) break; - - /* Nothing happened, block for events */ - evts += srpc_check_event(stt_poll_interval()); - /* We may have blocked, check for expired timers */ - evts += stt_check_events(); - if (evts == 0) /* timed out and still no event */ - break; - } - - return evts; -} - -#endif diff --git a/lnet/selftest/ping_test.c b/lnet/selftest/ping_test.c deleted file mode 100644 index 39a67c7742a86915a50c6ea28882ff1513db33c9..0000000000000000000000000000000000000000 --- a/lnet/selftest/ping_test.c +++ /dev/null @@ -1,175 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org - * - * Test client & Server - */ - -#include "selftest.h" - -#define LST_PING_TEST_MAGIC 0xbabeface - -typedef struct { - spinlock_t pnd_lock; /* serialize */ - int pnd_counter; /* sequence counter */ -} lst_ping_data_t; - -static lst_ping_data_t lst_ping_data; - -static int -ping_client_init(sfw_test_instance_t *tsi) -{ - LASSERT (tsi->tsi_is_client); - - spin_lock_init(&lst_ping_data.pnd_lock); - lst_ping_data.pnd_counter = 0; - - return 0; -} - -static void -ping_client_fini (sfw_test_instance_t *tsi) -{ - sfw_session_t *sn = tsi->tsi_batch->bat_session; - int errors; - - LASSERT (sn != NULL); - LASSERT (tsi->tsi_is_client); - - errors = atomic_read(&sn->sn_ping_errors); - if (errors) - CWARN ("%d pings have failed.\n", errors); - else - CDEBUG (D_NET, "Ping test finished OK.\n"); -} - -static int -ping_client_prep_rpc(sfw_test_unit_t *tsu, - lnet_process_id_t dest, srpc_client_rpc_t **rpc) -{ - srpc_ping_reqst_t *req; - struct timeval tv; - int rc; - - rc = sfw_create_test_rpc(tsu, dest, 0, 0, rpc); - if (rc != 0) - return rc; - - req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst; - - req->pnr_magic = LST_PING_TEST_MAGIC; - - spin_lock(&lst_ping_data.pnd_lock); - req->pnr_seq = lst_ping_data.pnd_counter ++; - spin_unlock(&lst_ping_data.pnd_lock); - - cfs_fs_timeval(&tv); - req->pnr_time_sec = tv.tv_sec; - req->pnr_time_usec = tv.tv_usec; - - return rc; -} - -static void -ping_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) -{ - sfw_test_instance_t *tsi = tsu->tsu_instance; - sfw_session_t *sn = tsi->tsi_batch->bat_session; - srpc_ping_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst; - srpc_ping_reply_t *reply = &rpc->crpc_replymsg.msg_body.ping_reply; - struct timeval tv; - - LASSERT (sn != NULL); - - if (rpc->crpc_status != 0) { - if (!tsi->tsi_stopping) /* rpc could have been aborted */ - atomic_inc(&sn->sn_ping_errors); - CERROR ("Unable to ping %s (%d): %d\n", - libcfs_id2str(rpc->crpc_dest), - reqst->pnr_seq, rpc->crpc_status); - return; - } - - if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) { - __swab32s(&reply->pnr_seq); - __swab32s(&reply->pnr_magic); - __swab32s(&reply->pnr_status); - } - - if (reply->pnr_magic != LST_PING_TEST_MAGIC) { - rpc->crpc_status = -EBADMSG; - atomic_inc(&sn->sn_ping_errors); - CERROR ("Bad magic %u from %s, %u expected.\n", - reply->pnr_magic, libcfs_id2str(rpc->crpc_dest), - LST_PING_TEST_MAGIC); - return; - } - - if (reply->pnr_seq != reqst->pnr_seq) { - rpc->crpc_status = -EBADMSG; - atomic_inc(&sn->sn_ping_errors); - CERROR ("Bad seq %u from %s, %u expected.\n", - reply->pnr_seq, libcfs_id2str(rpc->crpc_dest), - reqst->pnr_seq); - return; - } - - cfs_fs_timeval(&tv); - CDEBUG (D_NET, "%d reply in %u usec\n", reply->pnr_seq, - (unsigned)((tv.tv_sec - (unsigned)reqst->pnr_time_sec) * 1000000 - + (tv.tv_usec - reqst->pnr_time_usec))); - return; -} - -static int -ping_server_handle (srpc_server_rpc_t *rpc) -{ - srpc_service_t *sv = rpc->srpc_service; - srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - srpc_ping_reqst_t *req = &reqstmsg->msg_body.ping_reqst; - srpc_ping_reply_t *rep = &rpc->srpc_replymsg.msg_body.ping_reply; - - LASSERT (sv->sv_id == SRPC_SERVICE_PING); - - if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) { - LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - __swab32s(&reqstmsg->msg_type); - __swab32s(&req->pnr_seq); - __swab32s(&req->pnr_magic); - __swab64s(&req->pnr_time_sec); - __swab64s(&req->pnr_time_usec); - } - LASSERT (reqstmsg->msg_type == srpc_service2request(sv->sv_id)); - - if (req->pnr_magic != LST_PING_TEST_MAGIC) { - CERROR ("Unexpect magic %08x from %s\n", - req->pnr_magic, libcfs_id2str(rpc->srpc_peer)); - return -EINVAL; - } - - rep->pnr_seq = req->pnr_seq; - rep->pnr_magic = LST_PING_TEST_MAGIC; - - CDEBUG (D_NET, "Get ping %d from %s\n", - req->pnr_seq, libcfs_id2str(rpc->srpc_peer)); - return 0; -} - -sfw_test_client_ops_t ping_test_client = -{ - .tso_init = ping_client_init, - .tso_fini = ping_client_fini, - .tso_prep_rpc = ping_client_prep_rpc, - .tso_done_rpc = ping_client_done_rpc, -}; - -srpc_service_t ping_test_service = -{ - .sv_name = "ping test", - .sv_handler = ping_server_handle, - .sv_id = SRPC_SERVICE_PING, -}; diff --git a/lnet/selftest/rpc.c b/lnet/selftest/rpc.c deleted file mode 100644 index ac5a8fe59cfd73a00fe3730550c9e72e79fb35a5..0000000000000000000000000000000000000000 --- a/lnet/selftest/rpc.c +++ /dev/null @@ -1,1714 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Isaac Huang <isaac@clusterfs.com> - * - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - - -#define SRPC_PEER_HASH_SIZE 101 /* # peer lists */ - -typedef enum { - SRPC_STATE_NONE, - SRPC_STATE_NI_INIT, - SRPC_STATE_EQ_INIT, - SRPC_STATE_WI_INIT, - SRPC_STATE_RUNNING, - SRPC_STATE_STOPPING, -} srpc_state_t; - -struct smoketest_rpc { - spinlock_t rpc_glock; /* global lock */ - srpc_service_t *rpc_services[SRPC_SERVICE_MAX_ID + 1]; - struct list_head *rpc_peers; /* hash table of known peers */ - lnet_handle_eq_t rpc_lnet_eq; /* _the_ LNet event queue */ - srpc_state_t rpc_state; - srpc_counters_t rpc_counters; - __u64 rpc_matchbits; /* matchbits counter */ -} srpc_data; - -static int srpc_peer_credits = 16; -CFS_MODULE_PARM(srpc_peer_credits, "i", int, 0444, - "# in-flight RPCs per peer (16 by default)"); - -/* forward ref's */ -int srpc_handle_rpc (swi_workitem_t *wi); - -void srpc_get_counters (srpc_counters_t *cnt) -{ - spin_lock(&srpc_data.rpc_glock); - *cnt = srpc_data.rpc_counters; - spin_unlock(&srpc_data.rpc_glock); -} - -void srpc_set_counters (const srpc_counters_t *cnt) -{ - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters = *cnt; - spin_unlock(&srpc_data.rpc_glock); -} - -void -srpc_add_bulk_page (srpc_bulk_t *bk, cfs_page_t *pg, int i) -{ - LASSERT (i >= 0 && i < bk->bk_niov); - -#ifdef __KERNEL__ - bk->bk_iovs[i].kiov_offset = 0; - bk->bk_iovs[i].kiov_page = pg; - bk->bk_iovs[i].kiov_len = CFS_PAGE_SIZE; -#else - LASSERT (bk->bk_pages != NULL); - - bk->bk_pages[i] = pg; - bk->bk_iovs[i].iov_len = CFS_PAGE_SIZE; - bk->bk_iovs[i].iov_base = cfs_page_address(pg); -#endif - return; -} - -void -srpc_free_bulk (srpc_bulk_t *bk) -{ - int i; - cfs_page_t *pg; - - LASSERT (bk != NULL); -#ifndef __KERNEL__ - LASSERT (bk->bk_pages != NULL); -#endif - - for (i = 0; i < bk->bk_niov; i++) { -#ifdef __KERNEL__ - pg = bk->bk_iovs[i].kiov_page; -#else - pg = bk->bk_pages[i]; -#endif - if (pg == NULL) break; - - cfs_free_page(pg); - } - -#ifndef __KERNEL__ - LIBCFS_FREE(bk->bk_pages, sizeof(cfs_page_t *) * bk->bk_niov); -#endif - LIBCFS_FREE(bk, offsetof(srpc_bulk_t, bk_iovs[bk->bk_niov])); - return; -} - -srpc_bulk_t * -srpc_alloc_bulk (int npages, int sink) -{ - srpc_bulk_t *bk; - cfs_page_t **pages; - int i; - - LASSERT (npages > 0 && npages <= LNET_MAX_IOV); - - LIBCFS_ALLOC(bk, offsetof(srpc_bulk_t, bk_iovs[npages])); - if (bk == NULL) { - CERROR ("Can't allocate descriptor for %d pages\n", npages); - return NULL; - } - - memset(bk, 0, offsetof(srpc_bulk_t, bk_iovs[npages])); - bk->bk_sink = sink; - bk->bk_niov = npages; - bk->bk_len = npages * CFS_PAGE_SIZE; -#ifndef __KERNEL__ - LIBCFS_ALLOC(pages, sizeof(cfs_page_t *) * npages); - if (pages == NULL) { - LIBCFS_FREE(bk, offsetof(srpc_bulk_t, bk_iovs[npages])); - CERROR ("Can't allocate page array for %d pages\n", npages); - return NULL; - } - - memset(pages, 0, sizeof(cfs_page_t *) * npages); - bk->bk_pages = pages; -#else - UNUSED (pages); -#endif - - for (i = 0; i < npages; i++) { - cfs_page_t *pg = cfs_alloc_page(CFS_ALLOC_STD); - - if (pg == NULL) { - CERROR ("Can't allocate page %d of %d\n", i, npages); - srpc_free_bulk(bk); - return NULL; - } - - srpc_add_bulk_page(bk, pg, i); - } - - return bk; -} - - -static inline struct list_head * -srpc_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % SRPC_PEER_HASH_SIZE; - - return &srpc_data.rpc_peers[hash]; -} - -static inline srpc_peer_t * -srpc_create_peer (lnet_nid_t nid) -{ - srpc_peer_t *peer; - - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC(peer, sizeof(srpc_peer_t)); - if (peer == NULL) { - CERROR ("Failed to allocate peer structure for %s\n", - libcfs_nid2str(nid)); - return NULL; - } - - memset(peer, 0, sizeof(srpc_peer_t)); - peer->stp_nid = nid; - peer->stp_credits = srpc_peer_credits; - - spin_lock_init(&peer->stp_lock); - CFS_INIT_LIST_HEAD(&peer->stp_rpcq); - CFS_INIT_LIST_HEAD(&peer->stp_ctl_rpcq); - return peer; -} - -srpc_peer_t * -srpc_find_peer_locked (lnet_nid_t nid) -{ - struct list_head *peer_list = srpc_nid2peerlist(nid); - srpc_peer_t *peer; - - LASSERT (nid != LNET_NID_ANY); - - list_for_each_entry (peer, peer_list, stp_list) { - if (peer->stp_nid == nid) - return peer; - } - - return NULL; -} - -static srpc_peer_t * -srpc_nid2peer (lnet_nid_t nid) -{ - srpc_peer_t *peer; - srpc_peer_t *new_peer; - - spin_lock(&srpc_data.rpc_glock); - peer = srpc_find_peer_locked(nid); - spin_unlock(&srpc_data.rpc_glock); - - if (peer != NULL) - return peer; - - new_peer = srpc_create_peer(nid); - - spin_lock(&srpc_data.rpc_glock); - - peer = srpc_find_peer_locked(nid); - if (peer != NULL) { - spin_unlock(&srpc_data.rpc_glock); - if (new_peer != NULL) - LIBCFS_FREE(new_peer, sizeof(srpc_peer_t)); - - return peer; - } - - if (new_peer == NULL) { - spin_unlock(&srpc_data.rpc_glock); - return NULL; - } - - list_add_tail(&new_peer->stp_list, srpc_nid2peerlist(nid)); - spin_unlock(&srpc_data.rpc_glock); - return new_peer; -} - -static inline __u64 -srpc_next_id (void) -{ - __u64 id; - - spin_lock(&srpc_data.rpc_glock); - id = srpc_data.rpc_matchbits++; - spin_unlock(&srpc_data.rpc_glock); - return id; -} - -void -srpc_init_server_rpc (srpc_server_rpc_t *rpc, - srpc_service_t *sv, srpc_buffer_t *buffer) -{ - memset(rpc, 0, sizeof(*rpc)); - swi_init_workitem(&rpc->srpc_wi, rpc, srpc_handle_rpc); - - rpc->srpc_ev.ev_fired = 1; /* no event expected now */ - - rpc->srpc_service = sv; - rpc->srpc_reqstbuf = buffer; - rpc->srpc_peer = buffer->buf_peer; - rpc->srpc_self = buffer->buf_self; - rpc->srpc_replymdh = LNET_INVALID_HANDLE; -} - -int -srpc_add_service (srpc_service_t *sv) -{ - int id = sv->sv_id; - int i; - srpc_server_rpc_t *rpc; - - LASSERT (sv->sv_concur > 0); - LASSERT (0 <= id && id <= SRPC_SERVICE_MAX_ID); - - spin_lock(&srpc_data.rpc_glock); - - LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING); - - if (srpc_data.rpc_services[id] != NULL) { - spin_unlock(&srpc_data.rpc_glock); - return -EBUSY; - } - - srpc_data.rpc_services[id] = sv; - spin_unlock(&srpc_data.rpc_glock); - - sv->sv_nprune = 0; - sv->sv_nposted_msg = 0; - sv->sv_shuttingdown = 0; - spin_lock_init(&sv->sv_lock); - CFS_INIT_LIST_HEAD(&sv->sv_free_rpcq); - CFS_INIT_LIST_HEAD(&sv->sv_active_rpcq); - CFS_INIT_LIST_HEAD(&sv->sv_posted_msgq); - CFS_INIT_LIST_HEAD(&sv->sv_blocked_msgq); - - sv->sv_ev.ev_data = sv; - sv->sv_ev.ev_type = SRPC_REQUEST_RCVD; - - for (i = 0; i < sv->sv_concur; i++) { - LIBCFS_ALLOC(rpc, sizeof(*rpc)); - if (rpc == NULL) goto enomem; - - list_add(&rpc->srpc_list, &sv->sv_free_rpcq); - } - - CDEBUG (D_NET, "Adding service: id %d, name %s, concurrency %d\n", - id, sv->sv_name, sv->sv_concur); - return 0; - -enomem: - while (!list_empty(&sv->sv_free_rpcq)) { - rpc = list_entry(sv->sv_free_rpcq.next, - srpc_server_rpc_t, srpc_list); - list_del(&rpc->srpc_list); - LIBCFS_FREE(rpc, sizeof(*rpc)); - } - - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_services[id] = NULL; - spin_unlock(&srpc_data.rpc_glock); - return -ENOMEM; -} - -int -srpc_remove_service (srpc_service_t *sv) -{ - int id = sv->sv_id; - - spin_lock(&srpc_data.rpc_glock); - - if (srpc_data.rpc_services[id] != sv) { - spin_unlock(&srpc_data.rpc_glock); - return -ENOENT; - } - - srpc_data.rpc_services[id] = NULL; - spin_unlock(&srpc_data.rpc_glock); - return 0; -} - -int -srpc_post_passive_rdma(int portal, __u64 matchbits, void *buf, - int len, int options, lnet_process_id_t peer, - lnet_handle_md_t *mdh, srpc_event_t *ev) -{ - int rc; - lnet_md_t md; - lnet_handle_me_t meh; - - rc = LNetMEAttach(portal, peer, matchbits, 0, - LNET_UNLINK, LNET_INS_AFTER, &meh); - if (rc != 0) { - CERROR ("LNetMEAttach failed: %d\n", rc); - LASSERT (rc == -ENOMEM); - return -ENOMEM; - } - - md.threshold = 1; - md.user_ptr = ev; - md.start = buf; - md.length = len; - md.options = options; - md.eq_handle = srpc_data.rpc_lnet_eq; - - rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh); - if (rc != 0) { - CERROR ("LNetMDAttach failed: %d\n", rc); - LASSERT (rc == -ENOMEM); - - rc = LNetMEUnlink(meh); - LASSERT (rc == 0); - return -ENOMEM; - } - - CDEBUG (D_NET, - "Posted passive RDMA: peer %s, portal %d, matchbits "LPX64"\n", - libcfs_id2str(peer), portal, matchbits); - return 0; -} - -int -srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len, - int options, lnet_process_id_t peer, lnet_nid_t self, - lnet_handle_md_t *mdh, srpc_event_t *ev) -{ - int rc; - lnet_md_t md; - - md.user_ptr = ev; - md.start = buf; - md.length = len; - md.eq_handle = srpc_data.rpc_lnet_eq; - md.threshold = ((options & LNET_MD_OP_GET) != 0) ? 2 : 1; - md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET); - - rc = LNetMDBind(md, LNET_UNLINK, mdh); - if (rc != 0) { - CERROR ("LNetMDBind failed: %d\n", rc); - LASSERT (rc == -ENOMEM); - return -ENOMEM; - } - - /* this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options. - * they're only meaningful for MDs attached to an ME (i.e. passive - * buffers... */ - if ((options & LNET_MD_OP_PUT) != 0) { - rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer, - portal, matchbits, 0, 0); - } else { - LASSERT ((options & LNET_MD_OP_GET) != 0); - - rc = LNetGet(self, *mdh, peer, portal, matchbits, 0); - } - - if (rc != 0) { - CERROR ("LNet%s(%s, %d, "LPD64") failed: %d\n", - ((options & LNET_MD_OP_PUT) != 0) ? "Put" : "Get", - libcfs_id2str(peer), portal, matchbits, rc); - - /* The forthcoming unlink event will complete this operation - * with failure, so fall through and return success here. - */ - rc = LNetMDUnlink(*mdh); - LASSERT (rc == 0); - } else { - CDEBUG (D_NET, - "Posted active RDMA: peer %s, portal %u, matchbits "LPX64"\n", - libcfs_id2str(peer), portal, matchbits); - } - return 0; -} - -int -srpc_post_active_rqtbuf(lnet_process_id_t peer, int service, void *buf, - int len, lnet_handle_md_t *mdh, srpc_event_t *ev) -{ - int rc; - int portal; - - if (service > SRPC_FRAMEWORK_SERVICE_MAX_ID) - portal = SRPC_REQUEST_PORTAL; - else - portal = SRPC_FRAMEWORK_REQUEST_PORTAL; - - rc = srpc_post_active_rdma(portal, service, buf, len, - LNET_MD_OP_PUT, peer, - LNET_NID_ANY, mdh, ev); - return rc; -} - -int -srpc_post_passive_rqtbuf(int service, void *buf, int len, - lnet_handle_md_t *mdh, srpc_event_t *ev) -{ - int rc; - int portal; - lnet_process_id_t any = {.nid = LNET_NID_ANY, - .pid = LNET_PID_ANY}; - - if (service > SRPC_FRAMEWORK_SERVICE_MAX_ID) - portal = SRPC_REQUEST_PORTAL; - else - portal = SRPC_FRAMEWORK_REQUEST_PORTAL; - - rc = srpc_post_passive_rdma(portal, service, buf, len, - LNET_MD_OP_PUT, any, mdh, ev); - return rc; -} - -int -srpc_service_post_buffer (srpc_service_t *sv, srpc_buffer_t *buf) -{ - srpc_msg_t *msg = &buf->buf_msg; - int rc; - - LASSERT (!sv->sv_shuttingdown); - - buf->buf_mdh = LNET_INVALID_HANDLE; - list_add(&buf->buf_list, &sv->sv_posted_msgq); - sv->sv_nposted_msg++; - spin_unlock(&sv->sv_lock); - - rc = srpc_post_passive_rqtbuf(sv->sv_id, msg, sizeof(*msg), - &buf->buf_mdh, &sv->sv_ev); - - /* At this point, a RPC (new or delayed) may have arrived in - * msg and its event handler has been called. So we must add - * buf to sv_posted_msgq _before_ dropping sv_lock */ - - spin_lock(&sv->sv_lock); - - if (rc == 0) { - if (sv->sv_shuttingdown) { - spin_unlock(&sv->sv_lock); - - /* srpc_shutdown_service might have tried to unlink me - * when my buf_mdh was still invalid */ - LNetMDUnlink(buf->buf_mdh); - - spin_lock(&sv->sv_lock); - } - return 0; - } - - sv->sv_nposted_msg--; - if (sv->sv_shuttingdown) return rc; - - list_del(&buf->buf_list); - - spin_unlock(&sv->sv_lock); - LIBCFS_FREE(buf, sizeof(*buf)); - spin_lock(&sv->sv_lock); - return rc; -} - -int -srpc_service_add_buffers (srpc_service_t *sv, int nbuffer) -{ - int rc; - int posted; - srpc_buffer_t *buf; - - LASSERTF (nbuffer > 0, - "nbuffer must be positive: %d\n", nbuffer); - - for (posted = 0; posted < nbuffer; posted++) { - LIBCFS_ALLOC(buf, sizeof(*buf)); - if (buf == NULL) break; - - spin_lock(&sv->sv_lock); - rc = srpc_service_post_buffer(sv, buf); - spin_unlock(&sv->sv_lock); - - if (rc != 0) break; - } - - return posted; -} - -void -srpc_service_remove_buffers (srpc_service_t *sv, int nbuffer) -{ - LASSERTF (nbuffer > 0, - "nbuffer must be positive: %d\n", nbuffer); - - spin_lock(&sv->sv_lock); - - LASSERT (sv->sv_nprune >= 0); - LASSERT (!sv->sv_shuttingdown); - - sv->sv_nprune += nbuffer; - - spin_unlock(&sv->sv_lock); - return; -} - -/* returns 1 if sv has finished, otherwise 0 */ -int -srpc_finish_service (srpc_service_t *sv) -{ - srpc_server_rpc_t *rpc; - srpc_buffer_t *buf; - - spin_lock(&sv->sv_lock); - - LASSERT (sv->sv_shuttingdown); /* srpc_shutdown_service called */ - - if (sv->sv_nposted_msg != 0 || !list_empty(&sv->sv_active_rpcq)) { - CDEBUG (D_NET, - "waiting for %d posted buffers to unlink and " - "in-flight RPCs to die.\n", - sv->sv_nposted_msg); - - if (!list_empty(&sv->sv_active_rpcq)) { - rpc = list_entry(sv->sv_active_rpcq.next, - srpc_server_rpc_t, srpc_list); - CDEBUG (D_NETERROR, - "Active RPC on shutdown: sv %s, peer %s, " - "wi %s scheduled %d running %d, " - "ev fired %d type %d status %d lnet %d\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.wi_state), - rpc->srpc_wi.wi_scheduled, - rpc->srpc_wi.wi_running, - rpc->srpc_ev.ev_fired, - rpc->srpc_ev.ev_type, - rpc->srpc_ev.ev_status, - rpc->srpc_ev.ev_lnet); - } - - spin_unlock(&sv->sv_lock); - return 0; - } - - spin_unlock(&sv->sv_lock); /* no lock needed from now on */ - - for (;;) { - struct list_head *q; - - if (!list_empty(&sv->sv_posted_msgq)) - q = &sv->sv_posted_msgq; - else if (!list_empty(&sv->sv_blocked_msgq)) - q = &sv->sv_blocked_msgq; - else - break; - - buf = list_entry(q->next, srpc_buffer_t, buf_list); - list_del(&buf->buf_list); - - LIBCFS_FREE(buf, sizeof(*buf)); - } - - while (!list_empty(&sv->sv_free_rpcq)) { - rpc = list_entry(sv->sv_free_rpcq.next, - srpc_server_rpc_t, srpc_list); - list_del(&rpc->srpc_list); - LIBCFS_FREE(rpc, sizeof(*rpc)); - } - - return 1; -} - -/* called with sv->sv_lock held */ -void -srpc_service_recycle_buffer (srpc_service_t *sv, srpc_buffer_t *buf) -{ - if (sv->sv_shuttingdown) goto free; - - if (sv->sv_nprune == 0) { - if (srpc_service_post_buffer(sv, buf) != 0) - CWARN ("Failed to post %s buffer\n", sv->sv_name); - return; - } - - sv->sv_nprune--; -free: - spin_unlock(&sv->sv_lock); - LIBCFS_FREE(buf, sizeof(*buf)); - spin_lock(&sv->sv_lock); -} - -void -srpc_shutdown_service (srpc_service_t *sv) -{ - srpc_server_rpc_t *rpc; - srpc_buffer_t *buf; - - spin_lock(&sv->sv_lock); - - CDEBUG (D_NET, "Shutting down service: id %d, name %s\n", - sv->sv_id, sv->sv_name); - - sv->sv_shuttingdown = 1; /* i.e. no new active RPC */ - - /* schedule in-flight RPCs to notice the shutdown */ - list_for_each_entry (rpc, &sv->sv_active_rpcq, srpc_list) { - swi_schedule_workitem(&rpc->srpc_wi); - } - - spin_unlock(&sv->sv_lock); - - /* OK to traverse sv_posted_msgq without lock, since no one - * touches sv_posted_msgq now */ - list_for_each_entry (buf, &sv->sv_posted_msgq, buf_list) - LNetMDUnlink(buf->buf_mdh); - - return; -} - -int -srpc_send_request (srpc_client_rpc_t *rpc) -{ - srpc_event_t *ev = &rpc->crpc_reqstev; - int rc; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REQUEST_SENT; - - rc = srpc_post_active_rqtbuf(rpc->crpc_dest, rpc->crpc_service, - &rpc->crpc_reqstmsg, sizeof(srpc_msg_t), - &rpc->crpc_reqstmdh, ev); - if (rc != 0) { - LASSERT (rc == -ENOMEM); - ev->ev_fired = 1; /* no more event expected */ - } - return rc; -} - -int -srpc_prepare_reply (srpc_client_rpc_t *rpc) -{ - srpc_event_t *ev = &rpc->crpc_replyev; - __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid; - int rc; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REPLY_RCVD; - - *id = srpc_next_id(); - - rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, *id, - &rpc->crpc_replymsg, sizeof(srpc_msg_t), - LNET_MD_OP_PUT, rpc->crpc_dest, - &rpc->crpc_replymdh, ev); - if (rc != 0) { - LASSERT (rc == -ENOMEM); - ev->ev_fired = 1; /* no more event expected */ - } - return rc; -} - -int -srpc_prepare_bulk (srpc_client_rpc_t *rpc) -{ - srpc_bulk_t *bk = &rpc->crpc_bulk; - srpc_event_t *ev = &rpc->crpc_bulkev; - __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid; - int rc; - int opt; - - LASSERT (bk->bk_niov <= LNET_MAX_IOV); - - if (bk->bk_niov == 0) return 0; /* nothing to do */ - - opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET; -#ifdef __KERNEL__ - opt |= LNET_MD_KIOV; -#else - opt |= LNET_MD_IOVEC; -#endif - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_BULK_REQ_RCVD; - - *id = srpc_next_id(); - - rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, *id, - &bk->bk_iovs[0], bk->bk_niov, opt, - rpc->crpc_dest, &bk->bk_mdh, ev); - if (rc != 0) { - LASSERT (rc == -ENOMEM); - ev->ev_fired = 1; /* no more event expected */ - } - return rc; -} - -int -srpc_do_bulk (srpc_server_rpc_t *rpc) -{ - srpc_event_t *ev = &rpc->srpc_ev; - srpc_bulk_t *bk = rpc->srpc_bulk; - __u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid; - int rc; - int opt; - - LASSERT (bk != NULL); - - opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT; -#ifdef __KERNEL__ - opt |= LNET_MD_KIOV; -#else - opt |= LNET_MD_IOVEC; -#endif - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT; - - rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id, - &bk->bk_iovs[0], bk->bk_niov, opt, - rpc->srpc_peer, rpc->srpc_self, - &bk->bk_mdh, ev); - if (rc != 0) - ev->ev_fired = 1; /* no more event expected */ - return rc; -} - -/* called with srpc_service_t::sv_lock held */ -inline void -srpc_schedule_server_rpc (srpc_server_rpc_t *rpc) -{ - srpc_service_t *sv = rpc->srpc_service; - - if (sv->sv_id > SRPC_FRAMEWORK_SERVICE_MAX_ID) - swi_schedule_workitem(&rpc->srpc_wi); - else /* framework RPCs are handled one by one */ - swi_schedule_serial_workitem(&rpc->srpc_wi); - - return; -} - -/* only called from srpc_handle_rpc */ -void -srpc_server_rpc_done (srpc_server_rpc_t *rpc, int status) -{ - srpc_service_t *sv = rpc->srpc_service; - srpc_buffer_t *buffer; - - LASSERT (status != 0 || rpc->srpc_wi.wi_state == SWI_STATE_DONE); - - rpc->srpc_status = status; - - CDEBUG (status == 0 ? D_NET : D_NETERROR, - "Server RPC done: service %s, peer %s, status %s:%d\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.wi_state), status); - - if (status != 0) { - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_dropped++; - spin_unlock(&srpc_data.rpc_glock); - } - - if (rpc->srpc_done != NULL) - (*rpc->srpc_done) (rpc); - LASSERT (rpc->srpc_bulk == NULL); - - spin_lock(&sv->sv_lock); - - if (rpc->srpc_reqstbuf != NULL) { - /* NB might drop sv_lock in srpc_service_recycle_buffer, but - * sv won't go away for sv_active_rpcq must not be empty */ - srpc_service_recycle_buffer(sv, rpc->srpc_reqstbuf); - rpc->srpc_reqstbuf = NULL; - } - - list_del(&rpc->srpc_list); /* from sv->sv_active_rpcq */ - - /* - * No one can schedule me now since: - * - I'm not on sv_active_rpcq. - * - all LNet events have been fired. - * Cancel pending schedules and prevent future schedule attempts: - */ - LASSERT (rpc->srpc_ev.ev_fired); - swi_kill_workitem(&rpc->srpc_wi); - - if (!sv->sv_shuttingdown && !list_empty(&sv->sv_blocked_msgq)) { - buffer = list_entry(sv->sv_blocked_msgq.next, - srpc_buffer_t, buf_list); - list_del(&buffer->buf_list); - - srpc_init_server_rpc(rpc, sv, buffer); - list_add_tail(&rpc->srpc_list, &sv->sv_active_rpcq); - srpc_schedule_server_rpc(rpc); - } else { - list_add(&rpc->srpc_list, &sv->sv_free_rpcq); - } - - spin_unlock(&sv->sv_lock); - return; -} - -/* handles an incoming RPC */ -int -srpc_handle_rpc (swi_workitem_t *wi) -{ - srpc_server_rpc_t *rpc = wi->wi_data; - srpc_service_t *sv = rpc->srpc_service; - srpc_event_t *ev = &rpc->srpc_ev; - int rc = 0; - - LASSERT (wi == &rpc->srpc_wi); - - spin_lock(&sv->sv_lock); - - if (sv->sv_shuttingdown) { - spin_unlock(&sv->sv_lock); - - if (rpc->srpc_bulk != NULL) - LNetMDUnlink(rpc->srpc_bulk->bk_mdh); - LNetMDUnlink(rpc->srpc_replymdh); - - if (ev->ev_fired) { /* no more event, OK to finish */ - srpc_server_rpc_done(rpc, -ESHUTDOWN); - return 1; - } - return 0; - } - - spin_unlock(&sv->sv_lock); - - switch (wi->wi_state) { - default: - LBUG (); - case SWI_STATE_NEWBORN: { - srpc_msg_t *msg; - srpc_generic_reply_t *reply; - - msg = &rpc->srpc_reqstbuf->buf_msg; - reply = &rpc->srpc_replymsg.msg_body.reply; - - if (msg->msg_version != SRPC_MSG_VERSION && - msg->msg_version != __swab32(SRPC_MSG_VERSION)) { - CWARN ("Version mismatch: %u, %u expected, from %s\n", - msg->msg_version, SRPC_MSG_VERSION, - libcfs_id2str(rpc->srpc_peer)); - reply->status = EPROTO; - } else { - reply->status = 0; - rc = (*sv->sv_handler) (rpc); - LASSERT (reply->status == 0 || !rpc->srpc_bulk); - } - - if (rc != 0) { - srpc_server_rpc_done(rpc, rc); - return 1; - } - - wi->wi_state = SWI_STATE_BULK_STARTED; - - if (rpc->srpc_bulk != NULL) { - rc = srpc_do_bulk(rpc); - if (rc == 0) - return 0; /* wait for bulk */ - - LASSERT (ev->ev_fired); - ev->ev_status = rc; - } - } - case SWI_STATE_BULK_STARTED: - LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired); - - if (rpc->srpc_bulk != NULL) { - rc = ev->ev_status; - - if (sv->sv_bulk_ready != NULL) - rc = (*sv->sv_bulk_ready) (rpc, rc); - - if (rc != 0) { - srpc_server_rpc_done(rpc, rc); - return 1; - } - } - - wi->wi_state = SWI_STATE_REPLY_SUBMITTED; - rc = srpc_send_reply(rpc); - if (rc == 0) - return 0; /* wait for reply */ - srpc_server_rpc_done(rpc, rc); - return 1; - - case SWI_STATE_REPLY_SUBMITTED: - LASSERT (ev->ev_fired); - - wi->wi_state = SWI_STATE_DONE; - srpc_server_rpc_done(rpc, ev->ev_status); - return 1; - } - - return 0; -} - -void -srpc_client_rpc_expired (void *data) -{ - srpc_client_rpc_t *rpc = data; - - CWARN ("Client RPC expired: service %d, peer %s, timeout %d.\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - rpc->crpc_timeout); - - spin_lock(&rpc->crpc_lock); - - rpc->crpc_timeout = 0; - srpc_abort_rpc(rpc, -ETIMEDOUT); - - spin_unlock(&rpc->crpc_lock); - - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_expired++; - spin_unlock(&srpc_data.rpc_glock); - return; -} - -inline void -srpc_add_client_rpc_timer (srpc_client_rpc_t *rpc) -{ - stt_timer_t *timer = &rpc->crpc_timer; - - if (rpc->crpc_timeout == 0) return; - - CFS_INIT_LIST_HEAD(&timer->stt_list); - timer->stt_data = rpc; - timer->stt_func = srpc_client_rpc_expired; - timer->stt_expires = cfs_time_add(rpc->crpc_timeout, - cfs_time_current_sec()); - stt_add_timer(timer); - return; -} - -/* - * Called with rpc->crpc_lock held. - * - * Upon exit the RPC expiry timer is not queued and the handler is not - * running on any CPU. */ -void -srpc_del_client_rpc_timer (srpc_client_rpc_t *rpc) -{ - /* timer not planted or already exploded */ - if (rpc->crpc_timeout == 0) return; - - /* timer sucessfully defused */ - if (stt_del_timer(&rpc->crpc_timer)) return; - -#ifdef __KERNEL__ - /* timer detonated, wait for it to explode */ - while (rpc->crpc_timeout != 0) { - spin_unlock(&rpc->crpc_lock); - - cfs_schedule(); - - spin_lock(&rpc->crpc_lock); - } -#else - LBUG(); /* impossible in single-threaded runtime */ -#endif - return; -} - -void -srpc_check_sends (srpc_peer_t *peer, int credits) -{ - struct list_head *q; - srpc_client_rpc_t *rpc; - - LASSERT (credits >= 0); - LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING); - - spin_lock(&peer->stp_lock); - peer->stp_credits += credits; - - while (peer->stp_credits) { - if (!list_empty(&peer->stp_ctl_rpcq)) - q = &peer->stp_ctl_rpcq; - else if (!list_empty(&peer->stp_rpcq)) - q = &peer->stp_rpcq; - else - break; - - peer->stp_credits--; - - rpc = list_entry(q->next, srpc_client_rpc_t, crpc_privl); - list_del_init(&rpc->crpc_privl); - srpc_client_rpc_decref(rpc); /* --ref for peer->*rpcq */ - - swi_schedule_workitem(&rpc->crpc_wi); - } - - spin_unlock(&peer->stp_lock); - return; -} - -void -srpc_client_rpc_done (srpc_client_rpc_t *rpc, int status) -{ - swi_workitem_t *wi = &rpc->crpc_wi; - srpc_peer_t *peer = rpc->crpc_peer; - - LASSERT (status != 0 || wi->wi_state == SWI_STATE_DONE); - - spin_lock(&rpc->crpc_lock); - - rpc->crpc_closed = 1; - if (rpc->crpc_status == 0) - rpc->crpc_status = status; - - srpc_del_client_rpc_timer(rpc); - - CDEBUG ((status == 0) ? D_NET : D_NETERROR, - "Client RPC done: service %d, peer %s, status %s:%d:%d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(wi->wi_state), rpc->crpc_aborted, status); - - /* - * No one can schedule me now since: - * - RPC timer has been defused. - * - all LNet events have been fired. - * - crpc_closed has been set, preventing srpc_abort_rpc from - * scheduling me. - * Cancel pending schedules and prevent future schedule attempts: - */ - LASSERT (!srpc_event_pending(rpc)); - swi_kill_workitem(wi); - - spin_unlock(&rpc->crpc_lock); - - (*rpc->crpc_done) (rpc); - - if (peer != NULL) - srpc_check_sends(peer, 1); - return; -} - -/* sends an outgoing RPC */ -int -srpc_send_rpc (swi_workitem_t *wi) -{ - int rc = 0; - srpc_client_rpc_t *rpc = wi->wi_data; - srpc_msg_t *reply = &rpc->crpc_replymsg; - int do_bulk = rpc->crpc_bulk.bk_niov > 0; - - LASSERT (rpc != NULL); - LASSERT (wi == &rpc->crpc_wi); - - spin_lock(&rpc->crpc_lock); - - if (rpc->crpc_aborted) { - spin_unlock(&rpc->crpc_lock); - goto abort; - } - - spin_unlock(&rpc->crpc_lock); - - switch (wi->wi_state) { - default: - LBUG (); - case SWI_STATE_NEWBORN: - LASSERT (!srpc_event_pending(rpc)); - - rc = srpc_prepare_reply(rpc); - if (rc != 0) { - srpc_client_rpc_done(rpc, rc); - return 1; - } - - rc = srpc_prepare_bulk(rpc); - if (rc != 0) break; - - wi->wi_state = SWI_STATE_REQUEST_SUBMITTED; - rc = srpc_send_request(rpc); - break; - - case SWI_STATE_REQUEST_SUBMITTED: - /* CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any - * order; however, they're processed in a strict order: - * rqt, rpy, and bulk. */ - if (!rpc->crpc_reqstev.ev_fired) break; - - rc = rpc->crpc_reqstev.ev_status; - if (rc != 0) break; - - wi->wi_state = SWI_STATE_REQUEST_SENT; - /* perhaps more events, fall thru */ - case SWI_STATE_REQUEST_SENT: { - srpc_msg_type_t type = srpc_service2reply(rpc->crpc_service); - - if (!rpc->crpc_replyev.ev_fired) break; - - rc = rpc->crpc_replyev.ev_status; - if (rc != 0) break; - - if ((reply->msg_type != type && - reply->msg_type != __swab32(type)) || - (reply->msg_magic != SRPC_MSG_MAGIC && - reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) { - CWARN ("Bad message from %s: type %u (%d expected)," - " magic %u (%d expected).\n", - libcfs_id2str(rpc->crpc_dest), - reply->msg_type, type, - reply->msg_magic, SRPC_MSG_MAGIC); - rc = -EBADMSG; - break; - } - - if (do_bulk && reply->msg_body.reply.status != 0) { - CWARN ("Remote error %d at %s, unlink bulk buffer in " - "case peer didn't initiate bulk transfer\n", - reply->msg_body.reply.status, - libcfs_id2str(rpc->crpc_dest)); - LNetMDUnlink(rpc->crpc_bulk.bk_mdh); - } - - wi->wi_state = SWI_STATE_REPLY_RECEIVED; - } - case SWI_STATE_REPLY_RECEIVED: - if (do_bulk && !rpc->crpc_bulkev.ev_fired) break; - - rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0; - - /* Bulk buffer was unlinked due to remote error. Clear error - * since reply buffer still contains valid data. - * NB rpc->crpc_done shouldn't look into bulk data in case of - * remote error. */ - if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK && - rpc->crpc_status == 0 && reply->msg_body.reply.status != 0) - rc = 0; - - wi->wi_state = SWI_STATE_DONE; - srpc_client_rpc_done(rpc, rc); - return 1; - } - - if (rc != 0) { - spin_lock(&rpc->crpc_lock); - srpc_abort_rpc(rpc, rc); - spin_unlock(&rpc->crpc_lock); - } - -abort: - if (rpc->crpc_aborted) { - LNetMDUnlink(rpc->crpc_reqstmdh); - LNetMDUnlink(rpc->crpc_replymdh); - LNetMDUnlink(rpc->crpc_bulk.bk_mdh); - - if (!srpc_event_pending(rpc)) { - srpc_client_rpc_done(rpc, -EINTR); - return 1; - } - } - return 0; -} - -srpc_client_rpc_t * -srpc_create_client_rpc (lnet_process_id_t peer, int service, - int nbulkiov, int bulklen, - void (*rpc_done)(srpc_client_rpc_t *), - void (*rpc_fini)(srpc_client_rpc_t *), void *priv) -{ - srpc_client_rpc_t *rpc; - - LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t, - crpc_bulk.bk_iovs[nbulkiov])); - if (rpc == NULL) - return NULL; - - srpc_init_client_rpc(rpc, peer, service, nbulkiov, - bulklen, rpc_done, rpc_fini, priv); - return rpc; -} - -/* called with rpc->crpc_lock held */ -static inline void -srpc_queue_rpc (srpc_peer_t *peer, srpc_client_rpc_t *rpc) -{ - int service = rpc->crpc_service; - - LASSERT (peer->stp_nid == rpc->crpc_dest.nid); - LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING); - - rpc->crpc_peer = peer; - - spin_lock(&peer->stp_lock); - - /* Framework RPCs that alter session state shall take precedence - * over test RPCs and framework query RPCs */ - if (service <= SRPC_FRAMEWORK_SERVICE_MAX_ID && - service != SRPC_SERVICE_DEBUG && - service != SRPC_SERVICE_QUERY_STAT) - list_add_tail(&rpc->crpc_privl, &peer->stp_ctl_rpcq); - else - list_add_tail(&rpc->crpc_privl, &peer->stp_rpcq); - - srpc_client_rpc_addref(rpc); /* ++ref for peer->*rpcq */ - spin_unlock(&peer->stp_lock); - return; -} - -/* called with rpc->crpc_lock held */ -void -srpc_abort_rpc (srpc_client_rpc_t *rpc, int why) -{ - srpc_peer_t *peer = rpc->crpc_peer; - - LASSERT (why != 0); - - if (rpc->crpc_aborted || /* already aborted */ - rpc->crpc_closed) /* callback imminent */ - return; - - CDEBUG (D_NET, - "Aborting RPC: service %d, peer %s, state %s, why %d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(rpc->crpc_wi.wi_state), why); - - rpc->crpc_aborted = 1; - rpc->crpc_status = why; - - if (peer != NULL) { - spin_lock(&peer->stp_lock); - - if (!list_empty(&rpc->crpc_privl)) { /* still queued */ - list_del_init(&rpc->crpc_privl); - srpc_client_rpc_decref(rpc); /* --ref for peer->*rpcq */ - rpc->crpc_peer = NULL; /* no credit taken */ - } - - spin_unlock(&peer->stp_lock); - } - - swi_schedule_workitem(&rpc->crpc_wi); - return; -} - -/* called with rpc->crpc_lock held */ -void -srpc_post_rpc (srpc_client_rpc_t *rpc) -{ - srpc_peer_t *peer; - - LASSERT (!rpc->crpc_aborted); - LASSERT (rpc->crpc_peer == NULL); - LASSERT (srpc_data.rpc_state == SRPC_STATE_RUNNING); - LASSERT ((rpc->crpc_bulk.bk_len & ~CFS_PAGE_MASK) == 0); - - CDEBUG (D_NET, "Posting RPC: peer %s, service %d, timeout %d\n", - libcfs_id2str(rpc->crpc_dest), rpc->crpc_service, - rpc->crpc_timeout); - - srpc_add_client_rpc_timer(rpc); - - peer = srpc_nid2peer(rpc->crpc_dest.nid); - if (peer == NULL) { - srpc_abort_rpc(rpc, -ENOMEM); - return; - } - - srpc_queue_rpc(peer, rpc); - - spin_unlock(&rpc->crpc_lock); - srpc_check_sends(peer, 0); - spin_lock(&rpc->crpc_lock); - return; -} - - -int -srpc_send_reply (srpc_server_rpc_t *rpc) -{ - srpc_event_t *ev = &rpc->srpc_ev; - srpc_msg_t *msg = &rpc->srpc_replymsg; - srpc_buffer_t *buffer = rpc->srpc_reqstbuf; - srpc_service_t *sv = rpc->srpc_service; - __u64 rpyid; - int rc; - - LASSERT (buffer != NULL); - rpyid = buffer->buf_msg.msg_body.reqst.rpyid; - - spin_lock(&sv->sv_lock); - - if (!sv->sv_shuttingdown && - sv->sv_id > SRPC_FRAMEWORK_SERVICE_MAX_ID) { - /* Repost buffer before replying since test client - * might send me another RPC once it gets the reply */ - if (srpc_service_post_buffer(sv, buffer) != 0) - CWARN ("Failed to repost %s buffer\n", sv->sv_name); - rpc->srpc_reqstbuf = NULL; - } - - spin_unlock(&sv->sv_lock); - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REPLY_SENT; - - msg->msg_magic = SRPC_MSG_MAGIC; - msg->msg_version = SRPC_MSG_VERSION; - msg->msg_type = srpc_service2reply(sv->sv_id); - - rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg, - sizeof(*msg), LNET_MD_OP_PUT, - rpc->srpc_peer, rpc->srpc_self, - &rpc->srpc_replymdh, ev); - if (rc != 0) - ev->ev_fired = 1; /* no more event expected */ - return rc; -} - -/* when in kernel always called with LNET_LOCK() held, and in thread context */ -void -srpc_lnet_ev_handler (lnet_event_t *ev) -{ - srpc_event_t *rpcev = ev->md.user_ptr; - srpc_client_rpc_t *crpc; - srpc_server_rpc_t *srpc; - srpc_buffer_t *buffer; - srpc_service_t *sv; - srpc_msg_t *msg; - srpc_msg_type_t type; - - LASSERT (!in_interrupt()); - - if (ev->status != 0) { - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.errors++; - spin_unlock(&srpc_data.rpc_glock); - } - - rpcev->ev_lnet = ev->type; - - switch (rpcev->ev_type) { - default: - LBUG (); - case SRPC_REQUEST_SENT: - if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) { - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_sent++; - spin_unlock(&srpc_data.rpc_glock); - } - case SRPC_REPLY_RCVD: - case SRPC_BULK_REQ_RCVD: - crpc = rpcev->ev_data; - - LASSERT (rpcev == &crpc->crpc_reqstev || - rpcev == &crpc->crpc_replyev || - rpcev == &crpc->crpc_bulkev); - - spin_lock(&crpc->crpc_lock); - - LASSERT (rpcev->ev_fired == 0); - rpcev->ev_fired = 1; - rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? - -EINTR : ev->status; - swi_schedule_workitem(&crpc->crpc_wi); - - spin_unlock(&crpc->crpc_lock); - break; - - case SRPC_REQUEST_RCVD: - sv = rpcev->ev_data; - - LASSERT (rpcev == &sv->sv_ev); - - spin_lock(&sv->sv_lock); - - LASSERT (ev->unlinked); - LASSERT (ev->type == LNET_EVENT_PUT || - ev->type == LNET_EVENT_UNLINK); - LASSERT (ev->type != LNET_EVENT_UNLINK || - sv->sv_shuttingdown); - - buffer = container_of(ev->md.start, srpc_buffer_t, buf_msg); - buffer->buf_peer = ev->initiator; - buffer->buf_self = ev->target.nid; - - sv->sv_nposted_msg--; - LASSERT (sv->sv_nposted_msg >= 0); - - if (sv->sv_shuttingdown) { - /* Leave buffer on sv->sv_posted_msgq since - * srpc_finish_service needs to traverse it. */ - spin_unlock(&sv->sv_lock); - break; - } - - list_del(&buffer->buf_list); /* from sv->sv_posted_msgq */ - msg = &buffer->buf_msg; - type = srpc_service2request(sv->sv_id); - - if (ev->status != 0 || ev->mlength != sizeof(*msg) || - (msg->msg_type != type && - msg->msg_type != __swab32(type)) || - (msg->msg_magic != SRPC_MSG_MAGIC && - msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) { - CERROR ("Dropping RPC (%s) from %s: " - "status %d mlength %d type %u magic %u.\n", - sv->sv_name, libcfs_id2str(ev->initiator), - ev->status, ev->mlength, - msg->msg_type, msg->msg_magic); - - /* NB might drop sv_lock in srpc_service_recycle_buffer, - * sv_nposted_msg++ as an implicit reference to prevent - * sv from disappearing under me */ - sv->sv_nposted_msg++; - srpc_service_recycle_buffer(sv, buffer); - sv->sv_nposted_msg--; - spin_unlock(&sv->sv_lock); - - if (ev->status == 0) { /* status!=0 counted already */ - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.errors++; - spin_unlock(&srpc_data.rpc_glock); - } - break; - } - - if (!list_empty(&sv->sv_free_rpcq)) { - srpc = list_entry(sv->sv_free_rpcq.next, - srpc_server_rpc_t, srpc_list); - list_del(&srpc->srpc_list); - - srpc_init_server_rpc(srpc, sv, buffer); - list_add_tail(&srpc->srpc_list, &sv->sv_active_rpcq); - srpc_schedule_server_rpc(srpc); - } else { - list_add_tail(&buffer->buf_list, &sv->sv_blocked_msgq); - } - - spin_unlock(&sv->sv_lock); - - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_rcvd++; - spin_unlock(&srpc_data.rpc_glock); - break; - - case SRPC_BULK_GET_RPLD: - LASSERT (ev->type == LNET_EVENT_SEND || - ev->type == LNET_EVENT_REPLY || - ev->type == LNET_EVENT_UNLINK); - - if (ev->type == LNET_EVENT_SEND && - ev->status == 0 && !ev->unlinked) - break; /* wait for the final LNET_EVENT_REPLY */ - - case SRPC_BULK_PUT_SENT: - if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) { - spin_lock(&srpc_data.rpc_glock); - - if (rpcev->ev_type == SRPC_BULK_GET_RPLD) - srpc_data.rpc_counters.bulk_get += ev->mlength; - else - srpc_data.rpc_counters.bulk_put += ev->mlength; - - spin_unlock(&srpc_data.rpc_glock); - } - case SRPC_REPLY_SENT: - srpc = rpcev->ev_data; - sv = srpc->srpc_service; - - LASSERT (rpcev == &srpc->srpc_ev); - - spin_lock(&sv->sv_lock); - rpcev->ev_fired = 1; - rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? - -EINTR : ev->status; - srpc_schedule_server_rpc(srpc); - spin_unlock(&sv->sv_lock); - break; - } - - return; -} - -#ifndef __KERNEL__ - -int -srpc_check_event (int timeout) -{ - lnet_event_t ev; - int rc; - int i; - - rc = LNetEQPoll(&srpc_data.rpc_lnet_eq, 1, - timeout * 1000, &ev, &i); - if (rc == 0) return 0; - - LASSERT (rc == -EOVERFLOW || rc == 1); - - /* We can't affort to miss any events... */ - if (rc == -EOVERFLOW) { - CERROR ("Dropped an event!!!\n"); - abort(); - } - - srpc_lnet_ev_handler(&ev); - return 1; -} - -#endif - -int -srpc_startup (void) -{ - int i; - int rc; - -#ifndef __KERNEL__ - char *s; - - s = getenv("SRPC_PEER_CREDITS"); - srpc_peer_credits = (s != NULL) ? atoi(s) : srpc_peer_credits; -#endif - - if (srpc_peer_credits <= 0) { - CERROR("Peer credits must be positive: %d\n", srpc_peer_credits); - return -EINVAL; - } - - memset(&srpc_data, 0, sizeof(struct smoketest_rpc)); - spin_lock_init(&srpc_data.rpc_glock); - - /* 1 second pause to avoid timestamp reuse */ - cfs_pause(cfs_time_seconds(1)); - srpc_data.rpc_matchbits = ((__u64) cfs_time_current_sec()) << 48; - - srpc_data.rpc_state = SRPC_STATE_NONE; - - LIBCFS_ALLOC(srpc_data.rpc_peers, - sizeof(struct list_head) * SRPC_PEER_HASH_SIZE); - if (srpc_data.rpc_peers == NULL) { - CERROR ("Failed to alloc peer hash.\n"); - return -ENOMEM; - } - - for (i = 0; i < SRPC_PEER_HASH_SIZE; i++) - CFS_INIT_LIST_HEAD(&srpc_data.rpc_peers[i]); - -#ifdef __KERNEL__ - rc = LNetNIInit(LUSTRE_SRV_LNET_PID); -#else - if (the_lnet.ln_server_mode_flag) - rc = LNetNIInit(LUSTRE_SRV_LNET_PID); - else - rc = LNetNIInit(getpid() | LNET_PID_USERFLAG); -#endif - if (rc < 0) { - CERROR ("LNetNIInit() has failed: %d\n", rc); - LIBCFS_FREE(srpc_data.rpc_peers, - sizeof(struct list_head) * SRPC_PEER_HASH_SIZE); - return rc; - } - - srpc_data.rpc_state = SRPC_STATE_NI_INIT; - - srpc_data.rpc_lnet_eq = LNET_EQ_NONE; -#ifdef __KERNEL__ - rc = LNetEQAlloc(16, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq); -#else - rc = LNetEQAlloc(10240, LNET_EQ_HANDLER_NONE, &srpc_data.rpc_lnet_eq); -#endif - if (rc != 0) { - CERROR("LNetEQAlloc() has failed: %d\n", rc); - goto bail; - } - - rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL); - LASSERT (rc == 0); - - srpc_data.rpc_state = SRPC_STATE_EQ_INIT; - - rc = swi_startup(); - if (rc != 0) - goto bail; - - srpc_data.rpc_state = SRPC_STATE_WI_INIT; - - rc = stt_startup(); - -bail: - if (rc != 0) - srpc_shutdown(); - else - srpc_data.rpc_state = SRPC_STATE_RUNNING; - - return rc; -} - -void -srpc_shutdown (void) -{ - int i; - int rc; - int state; - - state = srpc_data.rpc_state; - srpc_data.rpc_state = SRPC_STATE_STOPPING; - - switch (state) { - default: - LBUG (); - case SRPC_STATE_RUNNING: - spin_lock(&srpc_data.rpc_glock); - - for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) { - srpc_service_t *sv = srpc_data.rpc_services[i]; - - LASSERTF (sv == NULL, - "service not empty: id %d, name %s\n", - i, sv->sv_name); - } - - spin_unlock(&srpc_data.rpc_glock); - - stt_shutdown(); - - case SRPC_STATE_WI_INIT: - swi_shutdown(); - - case SRPC_STATE_EQ_INIT: - rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL); - LASSERT (rc == 0); - rc = LNetEQFree(srpc_data.rpc_lnet_eq); - LASSERT (rc == 0); /* the EQ should have no user by now */ - - case SRPC_STATE_NI_INIT: - LNetNIFini(); - break; - } - - /* srpc_peer_t's are kept in hash until shutdown */ - for (i = 0; i < SRPC_PEER_HASH_SIZE; i++) { - srpc_peer_t *peer; - - while (!list_empty(&srpc_data.rpc_peers[i])) { - peer = list_entry(srpc_data.rpc_peers[i].next, - srpc_peer_t, stp_list); - list_del(&peer->stp_list); - - LASSERT (list_empty(&peer->stp_rpcq)); - LASSERT (list_empty(&peer->stp_ctl_rpcq)); - LASSERT (peer->stp_credits == srpc_peer_credits); - - LIBCFS_FREE(peer, sizeof(srpc_peer_t)); - } - } - - LIBCFS_FREE(srpc_data.rpc_peers, - sizeof(struct list_head) * SRPC_PEER_HASH_SIZE); - return; -} diff --git a/lnet/selftest/rpc.h b/lnet/selftest/rpc.h deleted file mode 100644 index 5cd7b32de318b38745f05f72ecd38120e6931aea..0000000000000000000000000000000000000000 --- a/lnet/selftest/rpc.h +++ /dev/null @@ -1,235 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __SELFTEST_RPC_H__ -#define __SELFTEST_RPC_H__ - -#include <lnet/lnetst.h> - -/* - * LST wired structures - * - * XXX: *REPLY == *REQST + 1 - */ -typedef enum { - SRPC_MSG_MKSN_REQST = 0, - SRPC_MSG_MKSN_REPLY = 1, - SRPC_MSG_RMSN_REQST = 2, - SRPC_MSG_RMSN_REPLY = 3, - SRPC_MSG_BATCH_REQST = 4, - SRPC_MSG_BATCH_REPLY = 5, - SRPC_MSG_STAT_REQST = 6, - SRPC_MSG_STAT_REPLY = 7, - SRPC_MSG_TEST_REQST = 8, - SRPC_MSG_TEST_REPLY = 9, - SRPC_MSG_DEBUG_REQST = 10, - SRPC_MSG_DEBUG_REPLY = 11, - SRPC_MSG_BRW_REQST = 12, - SRPC_MSG_BRW_REPLY = 13, - SRPC_MSG_PING_REQST = 14, - SRPC_MSG_PING_REPLY = 15, - SRPC_MSG_JOIN_REQST = 16, - SRPC_MSG_JOIN_REPLY = 17, -} srpc_msg_type_t; - -/* CAVEAT EMPTOR: - * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer, - * and 2nd field matchbits of bulk buffer if any. - * - * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field - * session id if needed. - */ -typedef struct { - __u64 rpyid; /* reply buffer matchbits */ - __u64 bulkid; /* bulk buffer matchbits */ -} WIRE_ATTR srpc_generic_reqst_t; - -typedef struct { - __u32 status; - lst_sid_t sid; -} WIRE_ATTR srpc_generic_reply_t; - -/* FRAMEWORK RPCs */ -typedef struct { - __u64 mksn_rpyid; /* reply buffer matchbits */ - lst_sid_t mksn_sid; /* session id */ - __u32 mksn_force; /* use brute force */ - char mksn_name[LST_NAME_SIZE]; -} WIRE_ATTR srpc_mksn_reqst_t; /* make session request */ - -typedef struct { - __u32 mksn_status; /* session status */ - lst_sid_t mksn_sid; /* session id */ - __u32 mksn_timeout; /* session timeout */ - char mksn_name[LST_NAME_SIZE]; -} WIRE_ATTR srpc_mksn_reply_t; /* make session reply */ - -typedef struct { - __u64 rmsn_rpyid; /* reply buffer matchbits */ - lst_sid_t rmsn_sid; /* session id */ -} WIRE_ATTR srpc_rmsn_reqst_t; /* remove session request */ - -typedef struct { - __u32 rmsn_status; - lst_sid_t rmsn_sid; /* session id */ -} WIRE_ATTR srpc_rmsn_reply_t; /* remove session reply */ - -typedef struct { - __u64 join_rpyid; /* reply buffer matchbits */ - lst_sid_t join_sid; /* session id to join */ - char join_group[LST_NAME_SIZE]; /* group name */ -} WIRE_ATTR srpc_join_reqst_t; - -typedef struct { - __u32 join_status; /* returned status */ - lst_sid_t join_sid; /* session id */ - __u32 join_timeout; /* # seconds' inactivity to expire */ - char join_session[LST_NAME_SIZE]; /* session name */ -} WIRE_ATTR srpc_join_reply_t; - -typedef struct { - __u64 dbg_rpyid; /* reply buffer matchbits */ - lst_sid_t dbg_sid; /* session id */ - __u32 dbg_flags; /* bitmap of debug */ -} WIRE_ATTR srpc_debug_reqst_t; - -typedef struct { - __u32 dbg_status; /* returned code */ - lst_sid_t dbg_sid; /* session id */ - __u32 dbg_timeout; /* session timeout */ - __u32 dbg_nbatch; /* # of batches in the node */ - char dbg_name[LST_NAME_SIZE]; /* session name */ -} WIRE_ATTR srpc_debug_reply_t; - -#define SRPC_BATCH_OPC_RUN 1 -#define SRPC_BATCH_OPC_STOP 2 -#define SRPC_BATCH_OPC_QUERY 3 - -typedef struct { - __u64 bar_rpyid; /* reply buffer matchbits */ - lst_sid_t bar_sid; /* session id */ - lst_bid_t bar_bid; /* batch id */ - __u32 bar_opc; /* create/start/stop batch */ - __u32 bar_testidx; /* index of test */ - __u32 bar_arg; /* parameters */ -} WIRE_ATTR srpc_batch_reqst_t; - -typedef struct { - __u32 bar_status; /* status of request */ - lst_sid_t bar_sid; /* session id */ - __u32 bar_active; /* # of active tests in batch/test */ - __u32 bar_time; /* remained time */ -} WIRE_ATTR srpc_batch_reply_t; - -typedef struct { - __u64 str_rpyid; /* reply buffer matchbits */ - lst_sid_t str_sid; /* session id */ - __u32 str_type; /* type of stat */ -} WIRE_ATTR srpc_stat_reqst_t; - -typedef struct { - __u32 str_status; - lst_sid_t str_sid; - sfw_counters_t str_fw; - srpc_counters_t str_rpc; - lnet_counters_t str_lnet; -} WIRE_ATTR srpc_stat_reply_t; - -typedef struct { - __u32 blk_opc; /* bulk operation code */ - __u32 blk_npg; /* # of pages */ - __u32 blk_flags; /* reserved flags */ -} WIRE_ATTR test_bulk_req_t; - -typedef struct { - __u32 png_size; /* size of ping message */ - __u32 png_flags; /* reserved flags */ -} WIRE_ATTR test_ping_req_t; - -typedef struct { - __u64 tsr_rpyid; /* reply buffer matchbits */ - __u64 tsr_bulkid; /* bulk buffer matchbits */ - lst_sid_t tsr_sid; /* session id */ - lst_bid_t tsr_bid; /* batch id */ - __u32 tsr_service; /* test type: bulk|ping|... */ - /* test client loop count or # server buffers needed */ - __u32 tsr_loop; - __u32 tsr_concur; /* concurrency of test */ - __u8 tsr_is_client; /* is test client or not */ - __u8 tsr_stop_onerr; /* stop on error */ - __u32 tsr_ndest; /* # of dest nodes */ - - union { - test_bulk_req_t bulk; - test_ping_req_t ping; - } tsr_u; -} WIRE_ATTR srpc_test_reqst_t; - -typedef struct { - __u32 tsr_status; /* returned code */ - lst_sid_t tsr_sid; -} WIRE_ATTR srpc_test_reply_t; - -/* TEST RPCs */ -typedef struct { - __u64 pnr_rpyid; - __u32 pnr_magic; - __u32 pnr_seq; - __u64 pnr_time_sec; - __u64 pnr_time_usec; -} WIRE_ATTR srpc_ping_reqst_t; - -typedef struct { - __u32 pnr_status; - __u32 pnr_magic; - __u32 pnr_seq; -} WIRE_ATTR srpc_ping_reply_t; - -typedef struct { - __u64 brw_rpyid; /* reply buffer matchbits */ - __u64 brw_bulkid; /* bulk buffer matchbits */ - __u32 brw_rw; /* read or write */ - __u32 brw_len; /* bulk data len */ - __u32 brw_flags; /* bulk data patterns */ -} WIRE_ATTR srpc_brw_reqst_t; /* bulk r/w request */ - -typedef struct { - __u32 brw_status; -} WIRE_ATTR srpc_brw_reply_t; /* bulk r/w reply */ - -#define SRPC_MSG_MAGIC 0xeeb0f00d -#define SRPC_MSG_VERSION 1 -typedef struct { - __u32 msg_magic; /* magic */ - __u32 msg_version; /* # version */ - __u32 msg_type; /* what's in msg_body? srpc_msg_type_t */ - __u32 msg_reserved0; /* reserved seats */ - __u32 msg_reserved1; - __u32 msg_reserved2; - union { - srpc_generic_reqst_t reqst; - srpc_generic_reply_t reply; - - srpc_mksn_reqst_t mksn_reqst; - srpc_mksn_reply_t mksn_reply; - srpc_rmsn_reqst_t rmsn_reqst; - srpc_rmsn_reply_t rmsn_reply; - srpc_debug_reqst_t dbg_reqst; - srpc_debug_reply_t dbg_reply; - srpc_batch_reqst_t bat_reqst; - srpc_batch_reply_t bat_reply; - srpc_stat_reqst_t stat_reqst; - srpc_stat_reply_t stat_reply; - srpc_test_reqst_t tes_reqst; - srpc_test_reply_t tes_reply; - srpc_join_reqst_t join_reqst; - srpc_join_reply_t join_reply; - - srpc_ping_reqst_t ping_reqst; - srpc_ping_reply_t ping_reply; - srpc_brw_reqst_t brw_reqst; - srpc_brw_reply_t brw_reply; - } msg_body; -} WIRE_ATTR srpc_msg_t; - -#endif /* __SELFTEST_RPC_H__ */ diff --git a/lnet/selftest/selftest.h b/lnet/selftest/selftest.h deleted file mode 100644 index 50383505bb76a0530a9f68755d927434ce4895ba..0000000000000000000000000000000000000000 --- a/lnet/selftest/selftest.h +++ /dev/null @@ -1,592 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Isaac Huang <isaac@clusterfs.com> - * - */ -#ifndef __SELFTEST_SELFTEST_H__ -#define __SELFTEST_SELFTEST_H__ - -#define LNET_ONLY - -#ifndef __KERNEL__ - -/* XXX workaround XXX */ -#ifdef HAVE_SYS_TYPES_H -#include <sys/types.h> -#endif - -/* TODO: remove these when libcfs provides proper primitives for userspace - * - * Dummy implementations of spinlock_t and atomic_t work since userspace - * selftest is completely single-threaded, even using multi-threaded usocklnd. - */ -typedef struct { } spinlock_t; -static inline void spin_lock(spinlock_t *l) {return;} -static inline void spin_unlock(spinlock_t *l) {return;} -static inline void spin_lock_init(spinlock_t *l) {return;} - -typedef struct { volatile int counter; } atomic_t; -#define atomic_read(a) ((a)->counter) -#define atomic_set(a,b) do {(a)->counter = b; } while (0) -#define atomic_dec_and_test(a) ((--((a)->counter)) == 0) -#define atomic_inc(a) (((a)->counter)++) -#define atomic_dec(a) do { (a)->counter--; } while (0) - -#endif - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> -#include <lnet/lib-types.h> -#include <lnet/lnetst.h> - -#include "rpc.h" -#include "timer.h" - -#ifndef MADE_WITHOUT_COMPROMISE -#define MADE_WITHOUT_COMPROMISE -#endif - - -#define SWI_STATE_NEWBORN 0 -#define SWI_STATE_REPLY_SUBMITTED 1 -#define SWI_STATE_REPLY_SENT 2 -#define SWI_STATE_REQUEST_SUBMITTED 3 -#define SWI_STATE_REQUEST_SENT 4 -#define SWI_STATE_REPLY_RECEIVED 5 -#define SWI_STATE_BULK_STARTED 6 -#define SWI_STATE_DONE 10 - -/* forward refs */ -struct swi_workitem; -struct srpc_service; -struct sfw_test_unit; -struct sfw_test_instance; - -/* - * A workitems is deferred work with these semantics: - * - a workitem always runs in thread context. - * - a workitem can be concurrent with other workitems but is strictly - * serialized with respect to itself. - * - no CPU affinity, a workitem does not necessarily run on the same CPU - * that schedules it. However, this might change in the future. - * - if a workitem is scheduled again before it has a chance to run, it - * runs only once. - * - if a workitem is scheduled while it runs, it runs again after it - * completes; this ensures that events occurring while other events are - * being processed receive due attention. This behavior also allows a - * workitem to reschedule itself. - * - * Usage notes: - * - a workitem can sleep but it should be aware of how that sleep might - * affect others. - * - a workitem runs inside a kernel thread so there's no user space to access. - * - do not use a workitem if the scheduling latency can't be tolerated. - * - * When wi_action returns non-zero, it means the workitem has either been - * freed or reused and workitem scheduler won't touch it any more. - */ -typedef int (*swi_action_t) (struct swi_workitem *); -typedef struct swi_workitem { - struct list_head wi_list; /* chain on runq */ - int wi_state; - swi_action_t wi_action; - void *wi_data; - unsigned int wi_running:1; - unsigned int wi_scheduled:1; -} swi_workitem_t; - -static inline void -swi_init_workitem (swi_workitem_t *wi, void *data, swi_action_t action) -{ - CFS_INIT_LIST_HEAD(&wi->wi_list); - - wi->wi_running = 0; - wi->wi_scheduled = 0; - wi->wi_data = data; - wi->wi_action = action; - wi->wi_state = SWI_STATE_NEWBORN; -} - -#define SWI_RESCHED 128 /* # workitem scheduler loops before reschedule */ - -/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework - * services, e.g. create/modify session. - */ -#define SRPC_SERVICE_DEBUG 0 -#define SRPC_SERVICE_MAKE_SESSION 1 -#define SRPC_SERVICE_REMOVE_SESSION 2 -#define SRPC_SERVICE_BATCH 3 -#define SRPC_SERVICE_TEST 4 -#define SRPC_SERVICE_QUERY_STAT 5 -#define SRPC_SERVICE_JOIN 6 -#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10 -/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */ -#define SRPC_SERVICE_BRW 11 -#define SRPC_SERVICE_PING 12 -#define SRPC_SERVICE_MAX_ID 12 - -#define SRPC_REQUEST_PORTAL 50 -/* a lazy portal for framework RPC requests */ -#define SRPC_FRAMEWORK_REQUEST_PORTAL 51 -/* all reply/bulk RDMAs go to this portal */ -#define SRPC_RDMA_PORTAL 52 - -static inline srpc_msg_type_t -srpc_service2request (int service) -{ - switch (service) { - default: - LBUG (); - case SRPC_SERVICE_DEBUG: - return SRPC_MSG_DEBUG_REQST; - - case SRPC_SERVICE_MAKE_SESSION: - return SRPC_MSG_MKSN_REQST; - - case SRPC_SERVICE_REMOVE_SESSION: - return SRPC_MSG_RMSN_REQST; - - case SRPC_SERVICE_BATCH: - return SRPC_MSG_BATCH_REQST; - - case SRPC_SERVICE_TEST: - return SRPC_MSG_TEST_REQST; - - case SRPC_SERVICE_QUERY_STAT: - return SRPC_MSG_STAT_REQST; - - case SRPC_SERVICE_BRW: - return SRPC_MSG_BRW_REQST; - - case SRPC_SERVICE_PING: - return SRPC_MSG_PING_REQST; - - case SRPC_SERVICE_JOIN: - return SRPC_MSG_JOIN_REQST; - } -} - -static inline srpc_msg_type_t -srpc_service2reply (int service) -{ - return srpc_service2request(service) + 1; -} - -typedef enum { - SRPC_BULK_REQ_RCVD = 0, /* passive bulk request(PUT sink/GET source) received */ - SRPC_BULK_PUT_SENT = 1, /* active bulk PUT sent (source) */ - SRPC_BULK_GET_RPLD = 2, /* active bulk GET replied (sink) */ - SRPC_REPLY_RCVD = 3, /* incoming reply received */ - SRPC_REPLY_SENT = 4, /* outgoing reply sent */ - SRPC_REQUEST_RCVD = 5, /* incoming request received */ - SRPC_REQUEST_SENT = 6, /* outgoing request sent */ -} srpc_event_type_t; - -/* RPC event */ -typedef struct { - srpc_event_type_t ev_type; /* what's up */ - lnet_event_kind_t ev_lnet; /* LNet event type */ - int ev_fired; /* LNet event fired? */ - int ev_status; /* LNet event status */ - void *ev_data; /* owning server/client RPC */ -} srpc_event_t; - -typedef struct { - int bk_len; /* len of bulk data */ - lnet_handle_md_t bk_mdh; - int bk_sink; /* sink/source */ - int bk_niov; /* # iov in bk_iovs */ -#ifdef __KERNEL__ - lnet_kiov_t bk_iovs[0]; -#else - cfs_page_t **bk_pages; - lnet_md_iovec_t bk_iovs[0]; -#endif -} srpc_bulk_t; /* bulk descriptor */ - -typedef struct srpc_peer { - struct list_head stp_list; /* chain on peer hash */ - struct list_head stp_rpcq; /* q of non-control RPCs */ - struct list_head stp_ctl_rpcq; /* q of control RPCs */ - spinlock_t stp_lock; /* serialize */ - lnet_nid_t stp_nid; - int stp_credits; /* available credits */ -} srpc_peer_t; - -/* message buffer descriptor */ -typedef struct { - struct list_head buf_list; /* chain on srpc_service::*_msgq */ - srpc_msg_t buf_msg; - lnet_handle_md_t buf_mdh; - lnet_nid_t buf_self; - lnet_process_id_t buf_peer; -} srpc_buffer_t; - -/* server-side state of a RPC */ -typedef struct srpc_server_rpc { - struct list_head srpc_list; /* chain on srpc_service::*_rpcq */ - struct srpc_service *srpc_service; - swi_workitem_t srpc_wi; - srpc_event_t srpc_ev; /* bulk/reply event */ - lnet_nid_t srpc_self; - lnet_process_id_t srpc_peer; - srpc_msg_t srpc_replymsg; - lnet_handle_md_t srpc_replymdh; - srpc_buffer_t *srpc_reqstbuf; - srpc_bulk_t *srpc_bulk; - - int srpc_status; - void (*srpc_done)(struct srpc_server_rpc *); -} srpc_server_rpc_t; - -/* client-side state of a RPC */ -typedef struct srpc_client_rpc { - struct list_head crpc_list; /* chain on user's lists */ - struct list_head crpc_privl; /* chain on srpc_peer_t::*rpcq */ - spinlock_t crpc_lock; /* serialize */ - int crpc_service; - atomic_t crpc_refcount; - int crpc_timeout; /* # seconds to wait for reply */ - stt_timer_t crpc_timer; - swi_workitem_t crpc_wi; - lnet_process_id_t crpc_dest; - srpc_peer_t *crpc_peer; - - void (*crpc_done)(struct srpc_client_rpc *); - void (*crpc_fini)(struct srpc_client_rpc *); - int crpc_status; /* completion status */ - void *crpc_priv; /* caller data */ - - /* state flags */ - unsigned int crpc_aborted:1; /* being given up */ - unsigned int crpc_closed:1; /* completed */ - - /* RPC events */ - srpc_event_t crpc_bulkev; /* bulk event */ - srpc_event_t crpc_reqstev; /* request event */ - srpc_event_t crpc_replyev; /* reply event */ - - /* bulk, request(reqst), and reply exchanged on wire */ - srpc_msg_t crpc_reqstmsg; - srpc_msg_t crpc_replymsg; - lnet_handle_md_t crpc_reqstmdh; - lnet_handle_md_t crpc_replymdh; - srpc_bulk_t crpc_bulk; -} srpc_client_rpc_t; - -#define srpc_client_rpc_size(rpc) \ -offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov]) - -#define srpc_client_rpc_addref(rpc) \ -do { \ - CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \ - (rpc), libcfs_id2str((rpc)->crpc_dest), \ - atomic_read(&(rpc)->crpc_refcount)); \ - LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ - atomic_inc(&(rpc)->crpc_refcount); \ -} while (0) - -#define srpc_client_rpc_decref(rpc) \ -do { \ - CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \ - (rpc), libcfs_id2str((rpc)->crpc_dest), \ - atomic_read(&(rpc)->crpc_refcount)); \ - LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ - if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \ - srpc_destroy_client_rpc(rpc); \ -} while (0) - -#define srpc_event_pending(rpc) ((rpc)->crpc_bulkev.ev_fired == 0 || \ - (rpc)->crpc_reqstev.ev_fired == 0 || \ - (rpc)->crpc_replyev.ev_fired == 0) - -typedef struct srpc_service { - int sv_id; /* service id */ - const char *sv_name; /* human readable name */ - int sv_nprune; /* # posted RPC to be pruned */ - int sv_concur; /* max # concurrent RPCs */ - - spinlock_t sv_lock; - int sv_shuttingdown; - srpc_event_t sv_ev; /* LNet event */ - int sv_nposted_msg; /* # posted message buffers */ - struct list_head sv_free_rpcq; /* free RPC descriptors */ - struct list_head sv_active_rpcq; /* in-flight RPCs */ - struct list_head sv_posted_msgq; /* posted message buffers */ - struct list_head sv_blocked_msgq; /* blocked for RPC descriptor */ - - /* Service callbacks: - * - sv_handler: process incoming RPC request - * - sv_bulk_ready: notify bulk data - */ - int (*sv_handler) (srpc_server_rpc_t *); - int (*sv_bulk_ready) (srpc_server_rpc_t *, int); -} srpc_service_t; - -#define SFW_POST_BUFFERS 8 -#define SFW_SERVICE_CONCURRENCY (SFW_POST_BUFFERS/2) - -typedef struct { - struct list_head sn_list; /* chain on fw_zombie_sessions */ - lst_sid_t sn_id; /* unique identifier */ - unsigned int sn_timeout; /* # seconds' inactivity to expire */ - int sn_timer_active; - stt_timer_t sn_timer; - struct list_head sn_batches; /* list of batches */ - char sn_name[LST_NAME_SIZE]; - atomic_t sn_brw_errors; - atomic_t sn_ping_errors; -} sfw_session_t; - -#define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \ - (sid0).ses_stamp == (sid1).ses_stamp) - -typedef struct { - struct list_head bat_list; /* chain on sn_batches */ - lst_bid_t bat_id; /* batch id */ - int bat_error; /* error code of batch */ - sfw_session_t *bat_session; /* batch's session */ - atomic_t bat_nactive; /* # of active tests */ - struct list_head bat_tests; /* test instances */ -} sfw_batch_t; - -typedef struct { - int (*tso_init)(struct sfw_test_instance *tsi); /* intialize test client */ - void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test client */ - int (*tso_prep_rpc)(struct sfw_test_unit *tsu, - lnet_process_id_t dest, - srpc_client_rpc_t **rpc); /* prep a tests rpc */ - void (*tso_done_rpc)(struct sfw_test_unit *tsu, - srpc_client_rpc_t *rpc); /* done a test rpc */ -} sfw_test_client_ops_t; - -typedef struct sfw_test_instance { - struct list_head tsi_list; /* chain on batch */ - int tsi_service; /* test type */ - sfw_batch_t *tsi_batch; /* batch */ - sfw_test_client_ops_t *tsi_ops; /* test client operations */ - - /* public parameter for all test units */ - int tsi_is_client:1; /* is test client */ - int tsi_stoptsu_onerr:1; /* stop tsu on error */ - int tsi_concur; /* concurrency */ - int tsi_loop; /* loop count */ - - /* status of test instance */ - spinlock_t tsi_lock; /* serialize */ - int tsi_stopping:1; /* test is stopping */ - atomic_t tsi_nactive; /* # of active test unit */ - struct list_head tsi_units; /* test units */ - struct list_head tsi_free_rpcs; /* free rpcs */ - struct list_head tsi_active_rpcs; /* active rpcs */ - - union { - test_bulk_req_t bulk; /* bulk parameter */ - test_ping_req_t ping; /* ping parameter */ - } tsi_u; -} sfw_test_instance_t; - -/* XXX: trailing (CFS_PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at - * the end of pages are not used */ -#define SFW_MAX_CONCUR LST_MAX_CONCUR -#define SFW_ID_PER_PAGE (CFS_PAGE_SIZE / sizeof(lnet_process_id_t)) -#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE) -#define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE) - -typedef struct sfw_test_unit { - struct list_head tsu_list; /* chain on lst_test_instance */ - lnet_process_id_t tsu_dest; /* id of dest node */ - int tsu_loop; /* loop count of the test */ - sfw_test_instance_t *tsu_instance; /* pointer to test instance */ - void *tsu_private; /* private data */ - swi_workitem_t tsu_worker; /* workitem of the test unit */ -} sfw_test_unit_t; - -typedef struct { - struct list_head tsc_list; /* chain on fw_tests */ - srpc_service_t *tsc_srv_service; /* test service */ - sfw_test_client_ops_t *tsc_cli_ops; /* ops of test client */ -} sfw_test_case_t; - - -srpc_client_rpc_t * -sfw_create_rpc(lnet_process_id_t peer, int service, int nbulkiov, int bulklen, - void (*done) (srpc_client_rpc_t *), void *priv); -int sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer, - int nblk, int blklen, srpc_client_rpc_t **rpc); -void sfw_abort_rpc(srpc_client_rpc_t *rpc); -void sfw_post_rpc(srpc_client_rpc_t *rpc); -void sfw_client_rpc_done(srpc_client_rpc_t *rpc); -void sfw_unpack_message(srpc_msg_t *msg); -void sfw_free_pages(srpc_server_rpc_t *rpc); -void sfw_add_bulk_page(srpc_bulk_t *bk, cfs_page_t *pg, int i); -int sfw_alloc_pages(srpc_server_rpc_t *rpc, int npages, int sink); - -srpc_client_rpc_t * -srpc_create_client_rpc(lnet_process_id_t peer, int service, - int nbulkiov, int bulklen, - void (*rpc_done)(srpc_client_rpc_t *), - void (*rpc_fini)(srpc_client_rpc_t *), void *priv); -void srpc_post_rpc(srpc_client_rpc_t *rpc); -void srpc_abort_rpc(srpc_client_rpc_t *rpc, int why); -void srpc_free_bulk(srpc_bulk_t *bk); -srpc_bulk_t *srpc_alloc_bulk(int npages, int sink); -int srpc_send_rpc(swi_workitem_t *wi); -int srpc_send_reply(srpc_server_rpc_t *rpc); -int srpc_add_service(srpc_service_t *sv); -int srpc_remove_service(srpc_service_t *sv); -void srpc_shutdown_service(srpc_service_t *sv); -int srpc_finish_service(srpc_service_t *sv); -int srpc_service_add_buffers(srpc_service_t *sv, int nbuffer); -void srpc_service_remove_buffers(srpc_service_t *sv, int nbuffer); -void srpc_get_counters(srpc_counters_t *cnt); -void srpc_set_counters(const srpc_counters_t *cnt); - -void swi_kill_workitem(swi_workitem_t *wi); -void swi_schedule_workitem(swi_workitem_t *wi); -void swi_schedule_serial_workitem(swi_workitem_t *wi); -int swi_startup(void); -int sfw_startup(void); -int srpc_startup(void); -void swi_shutdown(void); -void sfw_shutdown(void); -void srpc_shutdown(void); - -static inline void -srpc_destroy_client_rpc (srpc_client_rpc_t *rpc) -{ - LASSERT (rpc != NULL); - LASSERT (!srpc_event_pending(rpc)); - LASSERT (list_empty(&rpc->crpc_privl)); - LASSERT (atomic_read(&rpc->crpc_refcount) == 0); -#ifndef __KERNEL__ - LASSERT (rpc->crpc_bulk.bk_pages == NULL); -#endif - - if (rpc->crpc_fini == NULL) { - LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc)); - } else { - (*rpc->crpc_fini) (rpc); - } - - return; -} - -static inline void -srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer, - int service, int nbulkiov, int bulklen, - void (*rpc_done)(srpc_client_rpc_t *), - void (*rpc_fini)(srpc_client_rpc_t *), void *priv) -{ - LASSERT (nbulkiov <= LNET_MAX_IOV); - - memset(rpc, 0, offsetof(srpc_client_rpc_t, - crpc_bulk.bk_iovs[nbulkiov])); - - CFS_INIT_LIST_HEAD(&rpc->crpc_list); - CFS_INIT_LIST_HEAD(&rpc->crpc_privl); - swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc); - spin_lock_init(&rpc->crpc_lock); - atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */ - - rpc->crpc_dest = peer; - rpc->crpc_priv = priv; - rpc->crpc_service = service; - rpc->crpc_bulk.bk_len = bulklen; - rpc->crpc_bulk.bk_niov = nbulkiov; - rpc->crpc_done = rpc_done; - rpc->crpc_fini = rpc_fini; - rpc->crpc_reqstmdh = - rpc->crpc_replymdh = - rpc->crpc_bulk.bk_mdh = LNET_INVALID_HANDLE; - - /* no event is expected at this point */ - rpc->crpc_bulkev.ev_fired = - rpc->crpc_reqstev.ev_fired = - rpc->crpc_replyev.ev_fired = 1; - - rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC; - rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION; - rpc->crpc_reqstmsg.msg_type = srpc_service2request(service); - return; -} - -static inline const char * -swi_state2str (int state) -{ -#define STATE2STR(x) case x: return #x - switch(state) { - default: - LBUG(); - STATE2STR(SWI_STATE_NEWBORN); - STATE2STR(SWI_STATE_REPLY_SUBMITTED); - STATE2STR(SWI_STATE_REPLY_SENT); - STATE2STR(SWI_STATE_REQUEST_SUBMITTED); - STATE2STR(SWI_STATE_REQUEST_SENT); - STATE2STR(SWI_STATE_REPLY_RECEIVED); - STATE2STR(SWI_STATE_BULK_STARTED); - STATE2STR(SWI_STATE_DONE); - } -#undef STATE2STR -} - -#define UNUSED(x) ( (void)(x) ) - -#ifndef __KERNEL__ - -int stt_poll_interval(void); -int sfw_session_removed(void); - -int stt_check_events(void); -int swi_check_events(void); -int srpc_check_event(int timeout); - -int lnet_selftest_init(void); -void lnet_selftest_fini(void); -int selftest_wait_events(void); - -#else - -#define selftest_wait_events() cfs_pause(cfs_time_seconds(1)) - -#endif - -#define lst_wait_until(cond, lock, fmt, a...) \ -do { \ - int __I = 2; \ - while (!(cond)) { \ - __I++; \ - CDEBUG(((__I & (-__I)) == __I) ? D_WARNING : \ - D_NET, /* 2**n? */ \ - fmt, ## a); \ - spin_unlock(&(lock)); \ - \ - selftest_wait_events(); \ - \ - spin_lock(&(lock)); \ - } \ -} while (0) - -static inline void -srpc_wait_service_shutdown (srpc_service_t *sv) -{ - int i = 2; - - spin_lock(&sv->sv_lock); - LASSERT (sv->sv_shuttingdown); - spin_unlock(&sv->sv_lock); - - while (srpc_finish_service(sv) == 0) { - i++; - CDEBUG (((i & -i) == i) ? D_WARNING : D_NET, - "Waiting for %s service to shutdown...\n", - sv->sv_name); - selftest_wait_events(); - } -} - -#endif /* __SELFTEST_SELFTEST_H__ */ diff --git a/lnet/selftest/timer.c b/lnet/selftest/timer.c deleted file mode 100644 index e847e482f44f5e65b1423232c3dc897f2c1dc451..0000000000000000000000000000000000000000 --- a/lnet/selftest/timer.c +++ /dev/null @@ -1,246 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Isaac Huang <isaac@clusterfs.com> - * - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - - -/* - * Timers are implemented as a sorted queue of expiry times. The queue - * is slotted, with each slot holding timers which expire in a - * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are - * sorted by increasing expiry time. The number of slots is 2**7 (128), - * to cover a time period of 1024 seconds into the future before wrapping. - */ -#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */ -#define STTIMER_SLOTTIME (1 << STTIMER_MINPOLL) -#define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1)) -#define STTIMER_NSLOTS (1 << 7) -#define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \ - (STTIMER_NSLOTS - 1))]) - -struct st_timer_data { - spinlock_t stt_lock; - /* start time of the slot processed previously */ - cfs_time_t stt_prev_slot; - struct list_head stt_hash[STTIMER_NSLOTS]; - int stt_shuttingdown; -#ifdef __KERNEL__ - cfs_waitq_t stt_waitq; - int stt_nthreads; -#endif -} stt_data; - -void -stt_add_timer (stt_timer_t *timer) -{ - struct list_head *pos; - - spin_lock(&stt_data.stt_lock); - -#ifdef __KERNEL__ - LASSERT (stt_data.stt_nthreads > 0); -#endif - LASSERT (!stt_data.stt_shuttingdown); - LASSERT (timer->stt_func != NULL); - LASSERT (list_empty(&timer->stt_list)); - LASSERT (cfs_time_after(timer->stt_expires, cfs_time_current_sec())); - - /* a simple insertion sort */ - list_for_each_prev (pos, STTIMER_SLOT(timer->stt_expires)) { - stt_timer_t *old = list_entry(pos, stt_timer_t, stt_list); - - if (cfs_time_aftereq(timer->stt_expires, old->stt_expires)) - break; - } - list_add(&timer->stt_list, pos); - - spin_unlock(&stt_data.stt_lock); -} - -/* - * The function returns whether it has deactivated a pending timer or not. - * (ie. del_timer() of an inactive timer returns 0, del_timer() of an - * active timer returns 1.) - * - * CAVEAT EMPTOR: - * When 0 is returned, it is possible that timer->stt_func _is_ running on - * another CPU. - */ -int -stt_del_timer (stt_timer_t *timer) -{ - int ret = 0; - - spin_lock(&stt_data.stt_lock); - -#ifdef __KERNEL__ - LASSERT (stt_data.stt_nthreads > 0); -#endif - LASSERT (!stt_data.stt_shuttingdown); - - if (!list_empty(&timer->stt_list)) { - ret = 1; - list_del_init(&timer->stt_list); - } - - spin_unlock(&stt_data.stt_lock); - return ret; -} - -/* called with stt_data.stt_lock held */ -int -stt_expire_list (struct list_head *slot, cfs_time_t now) -{ - int expired = 0; - stt_timer_t *timer; - - while (!list_empty(slot)) { - timer = list_entry(slot->next, stt_timer_t, stt_list); - - if (cfs_time_after(timer->stt_expires, now)) - break; - - list_del_init(&timer->stt_list); - spin_unlock(&stt_data.stt_lock); - - expired++; - (*timer->stt_func) (timer->stt_data); - - spin_lock(&stt_data.stt_lock); - } - - return expired; -} - -int -stt_check_timers (cfs_time_t *last) -{ - int expired = 0; - cfs_time_t now; - cfs_time_t this_slot; - - now = cfs_time_current_sec(); - this_slot = now & STTIMER_SLOTTIMEMASK; - - spin_lock(&stt_data.stt_lock); - - while (cfs_time_aftereq(this_slot, *last)) { - expired += stt_expire_list(STTIMER_SLOT(this_slot), now); - this_slot = cfs_time_sub(this_slot, STTIMER_SLOTTIME); - } - - *last = now & STTIMER_SLOTTIMEMASK; - spin_unlock(&stt_data.stt_lock); - return expired; -} - -#ifdef __KERNEL__ - -int -stt_timer_main (void *arg) -{ - UNUSED(arg); - - cfs_daemonize("st_timer"); - cfs_block_allsigs(); - - while (!stt_data.stt_shuttingdown) { - stt_check_timers(&stt_data.stt_prev_slot); - - cfs_waitq_wait_event_timeout(stt_data.stt_waitq, - stt_data.stt_shuttingdown, - cfs_time_seconds(STTIMER_SLOTTIME)); - } - - spin_lock(&stt_data.stt_lock); - stt_data.stt_nthreads--; - spin_unlock(&stt_data.stt_lock); - return 0; -} - -int -stt_start_timer_thread (void) -{ - long pid; - - LASSERT (!stt_data.stt_shuttingdown); - - pid = cfs_kernel_thread(stt_timer_main, NULL, 0); - if (pid < 0) - return (int)pid; - - spin_lock(&stt_data.stt_lock); - stt_data.stt_nthreads++; - spin_unlock(&stt_data.stt_lock); - return 0; -} - -#else /* !__KERNEL__ */ - -int -stt_check_events (void) -{ - return stt_check_timers(&stt_data.stt_prev_slot); -} - -int -stt_poll_interval (void) -{ - return STTIMER_SLOTTIME; -} - -#endif - -int -stt_startup (void) -{ - int rc = 0; - int i; - - stt_data.stt_shuttingdown = 0; - stt_data.stt_prev_slot = cfs_time_current_sec() & STTIMER_SLOTTIMEMASK; - - spin_lock_init(&stt_data.stt_lock); - for (i = 0; i < STTIMER_NSLOTS; i++) - CFS_INIT_LIST_HEAD(&stt_data.stt_hash[i]); - -#ifdef __KERNEL__ - stt_data.stt_nthreads = 0; - cfs_waitq_init(&stt_data.stt_waitq); - rc = stt_start_timer_thread(); - if (rc != 0) - CERROR ("Can't spawn timer thread: %d\n", rc); -#endif - - return rc; -} - -void -stt_shutdown (void) -{ - int i; - - spin_lock(&stt_data.stt_lock); - - for (i = 0; i < STTIMER_NSLOTS; i++) - LASSERT (list_empty(&stt_data.stt_hash[i])); - - stt_data.stt_shuttingdown = 1; - -#ifdef __KERNEL__ - cfs_waitq_signal(&stt_data.stt_waitq); - lst_wait_until(stt_data.stt_nthreads == 0, stt_data.stt_lock, - "waiting for %d threads to terminate\n", - stt_data.stt_nthreads); -#endif - - spin_unlock(&stt_data.stt_lock); - return; -} diff --git a/lnet/selftest/timer.h b/lnet/selftest/timer.h deleted file mode 100644 index c88027ce2c142f1dab55a6bba4cab3bb01456786..0000000000000000000000000000000000000000 --- a/lnet/selftest/timer.h +++ /dev/null @@ -1,23 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Isaac Huang <isaac@clusterfs.com> - * - */ -#ifndef __SELFTEST_TIMER_H__ -#define __SELFTEST_TIMER_H__ - -typedef struct { - struct list_head stt_list; - cfs_time_t stt_expires; - void (*stt_func) (void *); - void *stt_data; -} stt_timer_t; - -void stt_add_timer (stt_timer_t *timer); -int stt_del_timer (stt_timer_t *timer); -int stt_startup (void); -void stt_shutdown (void); - -#endif /* __SELFTEST_TIMER_H__ */ diff --git a/lnet/selftest/workitem.c b/lnet/selftest/workitem.c deleted file mode 100644 index 789da8d01e99a192f5fbdf3fd78a44fe10b92e5a..0000000000000000000000000000000000000000 --- a/lnet/selftest/workitem.c +++ /dev/null @@ -1,340 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Isaac Huang <isaac@clusterfs.com> - * - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - - -struct smoketest_workitem { - struct list_head wi_runq; /* concurrent workitems */ - struct list_head wi_serial_runq; /* serialised workitems */ - cfs_waitq_t wi_waitq; /* where schedulers sleep */ - cfs_waitq_t wi_serial_waitq; /* where serial scheduler sleep */ - spinlock_t wi_lock; /* serialize */ - int wi_shuttingdown; - int wi_nthreads; -} swi_data; - -static inline int -swi_sched_cansleep (struct list_head *q) -{ - int rc; - - spin_lock(&swi_data.wi_lock); - - rc = !swi_data.wi_shuttingdown && list_empty(q); - - spin_unlock(&swi_data.wi_lock); - return rc; -} - -/* XXX: - * 0. it only works when called from wi->wi_action. - * 1. when it returns no one shall try to schedule the workitem. - */ -void -swi_kill_workitem (swi_workitem_t *wi) -{ - LASSERT (!in_interrupt()); /* because we use plain spinlock */ - LASSERT (!swi_data.wi_shuttingdown); - - spin_lock(&swi_data.wi_lock); - -#ifdef __KERNEL__ - LASSERT (wi->wi_running); -#endif - - if (wi->wi_scheduled) { /* cancel pending schedules */ - LASSERT (!list_empty(&wi->wi_list)); - list_del_init(&wi->wi_list); - } - - LASSERT (list_empty(&wi->wi_list)); - wi->wi_scheduled = 1; /* LBUG future schedule attempts */ - - spin_unlock(&swi_data.wi_lock); - return; -} - -void -swi_schedule_workitem (swi_workitem_t *wi) -{ - LASSERT (!in_interrupt()); /* because we use plain spinlock */ - LASSERT (!swi_data.wi_shuttingdown); - - spin_lock(&swi_data.wi_lock); - - if (!wi->wi_scheduled) { - LASSERT (list_empty(&wi->wi_list)); - - wi->wi_scheduled = 1; - list_add_tail(&wi->wi_list, &swi_data.wi_runq); - cfs_waitq_signal(&swi_data.wi_waitq); - } - - LASSERT (!list_empty(&wi->wi_list)); - spin_unlock(&swi_data.wi_lock); - return; -} - -/* - * Workitem scheduled by this function is strictly serialised not only with - * itself, but also with others scheduled this way. - * - * Now there's only one static serialised queue, but in the future more might - * be added, and even dynamic creation of serialised queues might be supported. - */ -void -swi_schedule_serial_workitem (swi_workitem_t *wi) -{ - LASSERT (!in_interrupt()); /* because we use plain spinlock */ - LASSERT (!swi_data.wi_shuttingdown); - - spin_lock(&swi_data.wi_lock); - - if (!wi->wi_scheduled) { - LASSERT (list_empty(&wi->wi_list)); - - wi->wi_scheduled = 1; - list_add_tail(&wi->wi_list, &swi_data.wi_serial_runq); - cfs_waitq_signal(&swi_data.wi_serial_waitq); - } - - LASSERT (!list_empty(&wi->wi_list)); - spin_unlock(&swi_data.wi_lock); - return; -} - -#ifdef __KERNEL__ - -int -swi_scheduler_main (void *arg) -{ - int id = (long) arg; - char name[16]; - - snprintf(name, sizeof(name), "swi_sd%03d", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - spin_lock(&swi_data.wi_lock); - - while (!swi_data.wi_shuttingdown) { - int nloops = 0; - int rc; - swi_workitem_t *wi; - - while (!list_empty(&swi_data.wi_runq) && - nloops < SWI_RESCHED) { - wi = list_entry(swi_data.wi_runq.next, - swi_workitem_t, wi_list); - list_del_init(&wi->wi_list); - - LASSERT (wi->wi_scheduled); - - nloops++; - if (wi->wi_running) { - list_add_tail(&wi->wi_list, &swi_data.wi_runq); - continue; - } - - wi->wi_running = 1; - wi->wi_scheduled = 0; - spin_unlock(&swi_data.wi_lock); - - rc = (*wi->wi_action) (wi); - - spin_lock(&swi_data.wi_lock); - if (rc == 0) /* wi still active */ - wi->wi_running = 0; - } - - spin_unlock(&swi_data.wi_lock); - - if (nloops < SWI_RESCHED) - wait_event_interruptible_exclusive( - swi_data.wi_waitq, - !swi_sched_cansleep(&swi_data.wi_runq)); - else - our_cond_resched(); - - spin_lock(&swi_data.wi_lock); - } - - swi_data.wi_nthreads--; - spin_unlock(&swi_data.wi_lock); - return 0; -} - -int -swi_serial_scheduler_main (void *arg) -{ - UNUSED (arg); - - cfs_daemonize("swi_serial_sd"); - cfs_block_allsigs(); - - spin_lock(&swi_data.wi_lock); - - while (!swi_data.wi_shuttingdown) { - int nloops = 0; - int rc; - swi_workitem_t *wi; - - while (!list_empty(&swi_data.wi_serial_runq) && - nloops < SWI_RESCHED) { - wi = list_entry(swi_data.wi_serial_runq.next, - swi_workitem_t, wi_list); - list_del_init(&wi->wi_list); - - LASSERT (!wi->wi_running); - LASSERT (wi->wi_scheduled); - - nloops++; - wi->wi_running = 1; - wi->wi_scheduled = 0; - spin_unlock(&swi_data.wi_lock); - - rc = (*wi->wi_action) (wi); - - spin_lock(&swi_data.wi_lock); - if (rc == 0) /* wi still active */ - wi->wi_running = 0; - } - - spin_unlock(&swi_data.wi_lock); - - if (nloops < SWI_RESCHED) - wait_event_interruptible_exclusive( - swi_data.wi_serial_waitq, - !swi_sched_cansleep(&swi_data.wi_serial_runq)); - else - our_cond_resched(); - - spin_lock(&swi_data.wi_lock); - } - - swi_data.wi_nthreads--; - spin_unlock(&swi_data.wi_lock); - return 0; -} - -int -swi_start_thread (int (*func) (void*), void *arg) -{ - long pid; - - LASSERT (!swi_data.wi_shuttingdown); - - pid = cfs_kernel_thread(func, arg, 0); - if (pid < 0) - return (int)pid; - - spin_lock(&swi_data.wi_lock); - swi_data.wi_nthreads++; - spin_unlock(&swi_data.wi_lock); - return 0; -} - -#else /* __KERNEL__ */ - -int -swi_check_events (void) -{ - int n = 0; - swi_workitem_t *wi; - struct list_head *q; - - spin_lock(&swi_data.wi_lock); - - for (;;) { - if (!list_empty(&swi_data.wi_serial_runq)) - q = &swi_data.wi_serial_runq; - else if (!list_empty(&swi_data.wi_runq)) - q = &swi_data.wi_runq; - else - break; - - wi = list_entry(q->next, swi_workitem_t, wi_list); - list_del_init(&wi->wi_list); - - LASSERT (wi->wi_scheduled); - wi->wi_scheduled = 0; - spin_unlock(&swi_data.wi_lock); - - n++; - (*wi->wi_action) (wi); - - spin_lock(&swi_data.wi_lock); - } - - spin_unlock(&swi_data.wi_lock); - return n; -} - -#endif - -int -swi_startup (void) -{ - int i; - int rc; - - swi_data.wi_nthreads = 0; - swi_data.wi_shuttingdown = 0; - spin_lock_init(&swi_data.wi_lock); - cfs_waitq_init(&swi_data.wi_waitq); - cfs_waitq_init(&swi_data.wi_serial_waitq); - CFS_INIT_LIST_HEAD(&swi_data.wi_runq); - CFS_INIT_LIST_HEAD(&swi_data.wi_serial_runq); - -#ifdef __KERNEL__ - rc = swi_start_thread(swi_serial_scheduler_main, NULL); - if (rc != 0) { - LASSERT (swi_data.wi_nthreads == 0); - CERROR ("Can't spawn serial workitem scheduler: %d\n", rc); - return rc; - } - - for (i = 0; i < num_online_cpus(); i++) { - rc = swi_start_thread(swi_scheduler_main, (void *) (long) i); - if (rc != 0) { - CERROR ("Can't spawn workitem scheduler: %d\n", rc); - swi_shutdown(); - return rc; - } - } -#else - UNUSED(i); - UNUSED(rc); -#endif - - return 0; -} - -void -swi_shutdown (void) -{ - spin_lock(&swi_data.wi_lock); - - LASSERT (list_empty(&swi_data.wi_runq)); - LASSERT (list_empty(&swi_data.wi_serial_runq)); - - swi_data.wi_shuttingdown = 1; - -#ifdef __KERNEL__ - cfs_waitq_broadcast(&swi_data.wi_waitq); - cfs_waitq_broadcast(&swi_data.wi_serial_waitq); - lst_wait_until(swi_data.wi_nthreads == 0, swi_data.wi_lock, - "waiting for %d threads to terminate\n", - swi_data.wi_nthreads); -#endif - - spin_unlock(&swi_data.wi_lock); - return; -} diff --git a/lnet/ulnds/.cvsignore b/lnet/ulnds/.cvsignore deleted file mode 100644 index 2711a44afcdfbbe0500e2607c084999be5d87f9e..0000000000000000000000000000000000000000 --- a/lnet/ulnds/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -.deps -Makefile -autoMakefile -autoMakefile.in diff --git a/lnet/ulnds/Makefile.in b/lnet/ulnds/Makefile.in deleted file mode 100644 index 78432ee60aa2f30a99ddc3633676ada906c609fd..0000000000000000000000000000000000000000 --- a/lnet/ulnds/Makefile.in +++ /dev/null @@ -1,5 +0,0 @@ -@BUILD_USOCKLND_TRUE@subdir-m += socklnd -@BUILD_UPTLLND_TRUE@subdir-m += ptllnd - -@INCLUDE_RULES@ - diff --git a/lnet/ulnds/autoMakefile.am b/lnet/ulnds/autoMakefile.am deleted file mode 100644 index 0e7fa4c2d8b3cca9a658366bfe0cca91cbc99a52..0000000000000000000000000000000000000000 --- a/lnet/ulnds/autoMakefile.am +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = socklnd ptllnd diff --git a/lnet/ulnds/ptllnd/.cvsignore b/lnet/ulnds/ptllnd/.cvsignore deleted file mode 100644 index e9955884756af11fe171e89bf99e459ac44f1a2a..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/ulnds/ptllnd/Makefile.am b/lnet/ulnds/ptllnd/Makefile.am deleted file mode 100644 index e48cb85fa21073d8bc0ba7e515b97e0a33110fd2..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ - -if BUILD_UPTLLND -if LIBLUSTRE -noinst_LIBRARIES = libptllnd.a -noinst_HEADERS = ptllnd.h -libptllnd_a_SOURCES = ptllnd.h ptllnd.c ptllnd_cb.c -libptllnd_a_CPPFLAGS= $(LLCPPFLAGS) -# I need $(PTLNDCPPLFLAGS) to be AFTER $(CPPFLAGS) -# Adding them into $(AM_CFLAGS) seems wrong, but lets me get on.. -libptllnd_a_CFLAGS= $(PTLLNDCPPFLAGS) $(LLCFLAGS) -endif -endif diff --git a/lnet/ulnds/ptllnd/ptllnd.c b/lnet/ulnds/ptllnd/ptllnd.c deleted file mode 100644 index ef882a1cedb3b0aa6dd08f0f1947228472883da9..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/ptllnd.c +++ /dev/null @@ -1,834 +0,0 @@ - -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: Eric Barton <eeb@bartonsoftware.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -lnd_t the_ptllnd = { - .lnd_type = PTLLND, - .lnd_startup = ptllnd_startup, - .lnd_shutdown = ptllnd_shutdown, - .lnd_ctl = ptllnd_ctl, - .lnd_send = ptllnd_send, - .lnd_recv = ptllnd_recv, - .lnd_eager_recv = ptllnd_eager_recv, - .lnd_notify = ptllnd_notify, - .lnd_wait = ptllnd_wait, - .lnd_setasync = ptllnd_setasync, -}; - -static int ptllnd_ni_count = 0; - -static struct list_head ptllnd_idle_history; -static struct list_head ptllnd_history_list; - -void -ptllnd_history_fini(void) -{ - ptllnd_he_t *he; - - while (!list_empty(&ptllnd_idle_history)) { - he = list_entry(ptllnd_idle_history.next, - ptllnd_he_t, he_list); - - list_del(&he->he_list); - LIBCFS_FREE(he, sizeof(*he)); - } - - while (!list_empty(&ptllnd_history_list)) { - he = list_entry(ptllnd_history_list.next, - ptllnd_he_t, he_list); - - list_del(&he->he_list); - LIBCFS_FREE(he, sizeof(*he)); - } -} - -int -ptllnd_history_init(void) -{ - int i; - ptllnd_he_t *he; - int n; - int rc; - - CFS_INIT_LIST_HEAD(&ptllnd_idle_history); - CFS_INIT_LIST_HEAD(&ptllnd_history_list); - - rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0); - if (rc != 0) - return rc; - - for (i = 0; i < n; i++) { - LIBCFS_ALLOC(he, sizeof(*he)); - if (he == NULL) { - ptllnd_history_fini(); - return -ENOMEM; - } - - list_add(&he->he_list, &ptllnd_idle_history); - } - - PTLLND_HISTORY("Init"); - - return 0; -} - -void -ptllnd_history(const char *fn, const char *file, const int line, - const char *fmt, ...) -{ - static int seq; - - va_list ap; - ptllnd_he_t *he; - - if (!list_empty(&ptllnd_idle_history)) { - he = list_entry(ptllnd_idle_history.next, - ptllnd_he_t, he_list); - } else if (!list_empty(&ptllnd_history_list)) { - he = list_entry(ptllnd_history_list.next, - ptllnd_he_t, he_list); - } else { - return; - } - - list_del(&he->he_list); - list_add_tail(&he->he_list, &ptllnd_history_list); - - he->he_seq = seq++; - he->he_fn = fn; - he->he_file = file; - he->he_line = line; - gettimeofday(&he->he_time, NULL); - - va_start(ap, fmt); - vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap); - va_end(ap); -} - -void -ptllnd_dump_history(void) -{ - ptllnd_he_t *he; - - PTLLND_HISTORY("dumping..."); - - while (!list_empty(&ptllnd_history_list)) { - he = list_entry(ptllnd_history_list.next, - ptllnd_he_t, he_list); - - list_del(&he->he_list); - - CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq, - (int)he->he_time.tv_sec, (int)he->he_time.tv_usec, - he->he_file, he->he_line, he->he_fn, he->he_msg); - - list_add_tail(&he->he_list, &ptllnd_idle_history); - } - - PTLLND_HISTORY("complete"); -} - -void -ptllnd_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux fedora 2.6.11-co-0.6.4 #1 Mon Jun 19 05:36:13 UTC 2006 i686 i686 i386 GNU - * with gcc version 4.1.1 20060525 (Red Hat 4.1.1-1) */ - - - /* Constants... */ - CLASSERT (PTL_RESERVED_MATCHBITS == 0x100); - CLASSERT (LNET_MSG_MATCHBITS == 0); - CLASSERT (PTLLND_MSG_MAGIC == 0x50746C4E); - CLASSERT (PTLLND_MSG_VERSION == 0x04); - CLASSERT (PTLLND_RDMA_OK == 0x00); - CLASSERT (PTLLND_RDMA_FAIL == 0x01); - CLASSERT (PTLLND_MSG_TYPE_INVALID == 0x00); - CLASSERT (PTLLND_MSG_TYPE_PUT == 0x01); - CLASSERT (PTLLND_MSG_TYPE_GET == 0x02); - CLASSERT (PTLLND_MSG_TYPE_IMMEDIATE == 0x03); - CLASSERT (PTLLND_MSG_TYPE_NOOP == 0x04); - CLASSERT (PTLLND_MSG_TYPE_HELLO == 0x05); - CLASSERT (PTLLND_MSG_TYPE_NAK == 0x06); - - /* Checks for struct kptl_msg_t */ - CLASSERT ((int)sizeof(kptl_msg_t) == 136); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_magic) == 0); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_magic) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_version) == 4); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_version) == 2); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_type) == 6); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_type) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_credits) == 7); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_credits) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_nob) == 8); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_nob) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_cksum) == 12); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_cksum) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcnid) == 16); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcstamp) == 24); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcstamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstnid) == 32); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dststamp) == 40); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dststamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcpid) == 48); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstpid) == 52); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.immediate) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.immediate) == 72); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.rdma) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.rdma) == 80); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.hello) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.hello) == 12); - - /* Checks for struct kptl_immediate_msg_t */ - CLASSERT ((int)sizeof(kptl_immediate_msg_t) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_hdr) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_payload[13]) == 85); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_payload[13]) == 1); - - /* Checks for struct kptl_rdma_msg_t */ - CLASSERT ((int)sizeof(kptl_rdma_msg_t) == 80); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_hdr) == 72); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_matchbits) == 72); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_matchbits) == 8); - - /* Checks for struct kptl_hello_msg_t */ - CLASSERT ((int)sizeof(kptl_hello_msg_t) == 12); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_matchbits) == 0); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_matchbits) == 8); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_max_msg_size) == 8); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_max_msg_size) == 4); -} - -int -ptllnd_parse_int_tunable(int *value, char *name, int dflt) -{ - char *env = getenv(name); - char *end; - - if (env == NULL) { - *value = dflt; - return 0; - } - - *value = strtoull(env, &end, 0); - if (*end == 0) - return 0; - - CERROR("Can't parse tunable %s=%s\n", name, env); - return -EINVAL; -} - -int -ptllnd_get_tunables(lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - int max_msg_size; - int msgs_per_buffer; - int rc; - int temp; - - /* Other tunable defaults depend on this */ - rc = ptllnd_parse_int_tunable(&plni->plni_debug, "PTLLND_DEBUG", 0); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_portal, - "PTLLND_PORTAL", PTLLND_PORTAL); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&temp, - "PTLLND_PID", PTLLND_PID); - if (rc != 0) - return rc; - plni->plni_ptllnd_pid = (ptl_pid_t)temp; - - rc = ptllnd_parse_int_tunable(&plni->plni_peer_credits, - "PTLLND_PEERCREDITS", PTLLND_PEERCREDITS); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&max_msg_size, - "PTLLND_MAX_MSG_SIZE", - PTLLND_MAX_ULND_MSG_SIZE); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&msgs_per_buffer, - "PTLLND_MSGS_PER_BUFFER", 64); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_msgs_spare, - "PTLLND_MSGS_SPARE", 256); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_peer_hash_size, - "PTLLND_PEER_HASH_SIZE", 101); - if (rc != 0) - return rc; - - - rc = ptllnd_parse_int_tunable(&plni->plni_eq_size, - "PTLLND_EQ_SIZE", 1024); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_checksum, - "PTLLND_CHECKSUM", 0); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history, - "PTLLND_TX_HISTORY", - plni->plni_debug ? 1024 : 0); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_protocol_mismatch, - "PTLLND_ABORT_ON_PROTOCOL_MISMATCH", 1); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak, - "PTLLND_ABORT_ON_NAK", 0); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak, - "PTLLND_DUMP_ON_NAK", plni->plni_debug); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_watchdog_interval, - "PTLLND_WATCHDOG_INTERVAL", 1); - if (rc != 0) - return rc; - if (plni->plni_watchdog_interval <= 0) - plni->plni_watchdog_interval = 1; - - rc = ptllnd_parse_int_tunable(&plni->plni_timeout, - "PTLLND_TIMEOUT", 50); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_long_wait, - "PTLLND_LONG_WAIT", - plni->plni_debug ? 5 : plni->plni_timeout); - if (rc != 0) - return rc; - plni->plni_long_wait *= 1000; /* convert to mS */ - - plni->plni_max_msg_size = max_msg_size & ~7; - if (plni->plni_max_msg_size < PTLLND_MIN_BUFFER_SIZE) - plni->plni_max_msg_size = PTLLND_MIN_BUFFER_SIZE; - CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0); - CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE); - - plni->plni_buffer_size = plni->plni_max_msg_size * msgs_per_buffer; - - CDEBUG(D_NET, "portal = %d\n",plni->plni_portal); - CDEBUG(D_NET, "ptllnd_pid = %d\n",plni->plni_ptllnd_pid); - CDEBUG(D_NET, "max_msg_size = %d\n",max_msg_size); - CDEBUG(D_NET, "msgs_per_buffer = %d\n",msgs_per_buffer); - CDEBUG(D_NET, "msgs_spare = %d\n",plni->plni_msgs_spare); - CDEBUG(D_NET, "peer_hash_size = %d\n",plni->plni_peer_hash_size); - CDEBUG(D_NET, "eq_size = %d\n",plni->plni_eq_size); - CDEBUG(D_NET, "max_msg_size = %d\n",plni->plni_max_msg_size); - CDEBUG(D_NET, "buffer_size = %d\n",plni->plni_buffer_size); - - return 0; -} - -ptllnd_buffer_t * -ptllnd_create_buffer (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_buffer_t *buf; - - LIBCFS_ALLOC(buf, sizeof(*buf)); - if (buf == NULL) { - CERROR("Can't allocate buffer descriptor\n"); - return NULL; - } - - buf->plb_ni = ni; - buf->plb_posted = 0; - CFS_INIT_LIST_HEAD(&buf->plb_list); - - LIBCFS_ALLOC(buf->plb_buffer, plni->plni_buffer_size); - if (buf->plb_buffer == NULL) { - CERROR("Can't allocate buffer size %d\n", - plni->plni_buffer_size); - LIBCFS_FREE(buf, sizeof(*buf)); - return NULL; - } - - list_add(&buf->plb_list, &plni->plni_buffers); - plni->plni_nbuffers++; - - return buf; -} - -void -ptllnd_destroy_buffer (ptllnd_buffer_t *buf) -{ - ptllnd_ni_t *plni = buf->plb_ni->ni_data; - - LASSERT (!buf->plb_posted); - - plni->plni_nbuffers--; - list_del(&buf->plb_list); - LIBCFS_FREE(buf->plb_buffer, plni->plni_buffer_size); - LIBCFS_FREE(buf, sizeof(*buf)); -} - -int -ptllnd_size_buffers (lnet_ni_t *ni, int delta) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_buffer_t *buf; - int nmsgs; - int nbufs; - int rc; - - CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers); - CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers); - - plni->plni_nmsgs += delta; - LASSERT(plni->plni_nmsgs >= 0); - - nmsgs = plni->plni_nmsgs + plni->plni_msgs_spare; - - nbufs = (nmsgs * plni->plni_max_msg_size + plni->plni_buffer_size - 1) / - plni->plni_buffer_size; - - while (nbufs > plni->plni_nbuffers) { - buf = ptllnd_create_buffer(ni); - - if (buf == NULL) - return -ENOMEM; - - rc = ptllnd_post_buffer(buf); - if (rc != 0) { - /* TODO - this path seems to orpahn the buffer - * in a state where its not posted and will never be - * However it does not leak the buffer as it's - * already been put onto the global buffer list - * and will be cleaned up - */ - return rc; - } - } - - CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers); - CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers); - return 0; -} - -void -ptllnd_destroy_buffers (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_buffer_t *buf; - struct list_head *tmp; - struct list_head *nxt; - - CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers); - CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers); - - list_for_each_safe(tmp, nxt, &plni->plni_buffers) { - buf = list_entry(tmp, ptllnd_buffer_t, plb_list); - - //CDEBUG(D_NET, "buf=%p posted=%d\n",buf,buf->plb_posted); - - LASSERT (plni->plni_nbuffers > 0); - if (buf->plb_posted) { - time_t start = cfs_time_current_sec(); - int w = plni->plni_long_wait; - - LASSERT (plni->plni_nposted_buffers > 0); - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - (void) PtlMDUnlink(buf->plb_md); - - while (buf->plb_posted) { - if (w > 0 && cfs_time_current_sec() > start + w/1000) { - CWARN("Waited %ds to unlink buffer\n", - (int)(cfs_time_current_sec() - start)); - w *= 2; - } - ptllnd_wait(ni, w); - } -#else - while (buf->plb_posted) { - rc = PtlMDUnlink(buf->plb_md); - if (rc == PTL_OK) { - buf->plb_posted = 0; - plni->plni_nposted_buffers--; - break; - } - LASSERT (rc == PTL_MD_IN_USE); - if (w > 0 && cfs_time_current_sec() > start + w/1000) { - CWARN("Waited %ds to unlink buffer\n", - cfs_time_current_sec() - start); - w *= 2; - } - ptllnd_wait(ni, w); - } -#endif - } - ptllnd_destroy_buffer(buf); - } - - CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers); - CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers); - - LASSERT (plni->plni_nposted_buffers == 0); - LASSERT (plni->plni_nbuffers == 0); -} - -int -ptllnd_create_peer_hash (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - int i; - - plni->plni_npeers = 0; - - LIBCFS_ALLOC(plni->plni_peer_hash, - plni->plni_peer_hash_size * sizeof(*plni->plni_peer_hash)); - if (plni->plni_peer_hash == NULL) { - CERROR("Can't allocate ptllnd peer hash (size %d)\n", - plni->plni_peer_hash_size); - return -ENOMEM; - } - - for (i = 0; i < plni->plni_peer_hash_size; i++) - CFS_INIT_LIST_HEAD(&plni->plni_peer_hash[i]); - - return 0; -} - -void -ptllnd_destroy_peer_hash (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - int i; - - LASSERT( plni->plni_npeers == 0); - - for (i = 0; i < plni->plni_peer_hash_size; i++) - LASSERT (list_empty(&plni->plni_peer_hash[i])); - - LIBCFS_FREE(plni->plni_peer_hash, - plni->plni_peer_hash_size * sizeof(*plni->plni_peer_hash)); -} - -void -ptllnd_close_peers (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_peer_t *plp; - int i; - - for (i = 0; i < plni->plni_peer_hash_size; i++) - while (!list_empty(&plni->plni_peer_hash[i])) { - plp = list_entry(plni->plni_peer_hash[i].next, - ptllnd_peer_t, plp_list); - - ptllnd_close_peer(plp, 0); - } -} - -int -ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - switch (cmd) { - case IOC_LIBCFS_DEBUG_PEER: - ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg)); - return 0; - - default: - return -EINVAL; - } -} - -__u64 -ptllnd_get_timestamp(void) -{ - struct timeval tv; - int rc = gettimeofday(&tv, NULL); - - LASSERT (rc == 0); - return ((__u64)tv.tv_sec) * 1000000 + tv.tv_usec; -} - -void -ptllnd_shutdown (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - int rc; - time_t start = cfs_time_current_sec(); - int w = plni->plni_long_wait; - - LASSERT (ptllnd_ni_count == 1); - plni->plni_max_tx_history = 0; - - ptllnd_cull_tx_history(plni); - - ptllnd_close_peers(ni); - ptllnd_destroy_buffers(ni); - - while (plni->plni_npeers > 0) { - if (w > 0 && cfs_time_current_sec() > start + w/1000) { - CWARN("Waited %ds for peers to shutdown\n", - (int)(cfs_time_current_sec() - start)); - w *= 2; - } - ptllnd_wait(ni, w); - } - - LASSERT (plni->plni_ntxs == 0); - LASSERT (plni->plni_nrxs == 0); - - rc = PtlEQFree(plni->plni_eqh); - LASSERT (rc == PTL_OK); - - rc = PtlNIFini(plni->plni_nih); - LASSERT (rc == PTL_OK); - - ptllnd_destroy_peer_hash(ni); - LIBCFS_FREE(plni, sizeof(*plni)); - ptllnd_ni_count--; -} - -int -ptllnd_startup (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni; - int rc; - - /* could get limits from portals I guess... */ - ni->ni_maxtxcredits = - ni->ni_peertxcredits = 1000; - - if (ptllnd_ni_count != 0) { - CERROR("Can't have > 1 instance of ptllnd\n"); - return -EPERM; - } - - ptllnd_ni_count++; - - rc = ptllnd_history_init(); - if (rc != 0) { - CERROR("Can't init history\n"); - goto failed0; - } - - LIBCFS_ALLOC(plni, sizeof(*plni)); - if (plni == NULL) { - CERROR("Can't allocate ptllnd state\n"); - rc = -ENOMEM; - goto failed0; - } - - ni->ni_data = plni; - - plni->plni_stamp = ptllnd_get_timestamp(); - plni->plni_nrxs = 0; - plni->plni_ntxs = 0; - plni->plni_ntx_history = 0; - plni->plni_watchdog_peeridx = 0; - plni->plni_watchdog_nextt = cfs_time_current_sec(); - CFS_INIT_LIST_HEAD(&plni->plni_zombie_txs); - CFS_INIT_LIST_HEAD(&plni->plni_tx_history); - - /* - * Initilize buffer related data structures - */ - CFS_INIT_LIST_HEAD(&plni->plni_buffers); - plni->plni_nbuffers = 0; - plni->plni_nposted_buffers = 0; - - rc = ptllnd_get_tunables(ni); - if (rc != 0) - goto failed1; - - rc = ptllnd_create_peer_hash(ni); - if (rc != 0) - goto failed1; - - /* NB I most probably won't get the PID I requested here. It doesn't - * matter because I don't need a fixed PID (only connection acceptors - * need a "well known" PID). */ - - rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_ptllnd_pid, - NULL, NULL, &plni->plni_nih); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CERROR("PtlNIInit failed: %s(%d)\n", - ptllnd_errtype2str(rc), rc); - rc = -ENODEV; - goto failed2; - } - - rc = PtlEQAlloc(plni->plni_nih, plni->plni_eq_size, - PTL_EQ_HANDLER_NONE, &plni->plni_eqh); - if (rc != PTL_OK) { - CERROR("PtlEQAlloc failed: %s(%d)\n", - ptllnd_errtype2str(rc), rc); - rc = -ENODEV; - goto failed3; - } - - /* - * Fetch the Portals NID - */ - rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id); - if (rc != PTL_OK) { - CERROR ("PtlGetID failed : %s(%d)\n", - ptllnd_errtype2str(rc), rc); - rc = -EINVAL; - goto failed4; - } - - /* - * Create the new NID. Based on the LND network type - * and the lower ni's address data. - */ - ni->ni_nid = ptllnd_ptl2lnetnid(ni, plni->plni_portals_id.nid); - - CDEBUG(D_NET, "ptl id =%s\n", ptllnd_ptlid2str(plni->plni_portals_id)); - CDEBUG(D_NET, "lnet id =%s (passed back)\n", - libcfs_id2str((lnet_process_id_t) { - .nid = ni->ni_nid, .pid = the_lnet.ln_pid})); - - rc = ptllnd_size_buffers(ni, 0); - if (rc != 0) - goto failed4; - - return 0; - - failed4: - ptllnd_destroy_buffers(ni); - PtlEQFree(plni->plni_eqh); - failed3: - PtlNIFini(plni->plni_nih); - failed2: - ptllnd_destroy_peer_hash(ni); - failed1: - LIBCFS_FREE(plni, sizeof(*plni)); - failed0: - ptllnd_history_fini(); - ptllnd_ni_count--; - CDEBUG(D_NET, "<<< rc=%d\n",rc); - return rc; -} - -const char *ptllnd_evtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_EVENT_GET_START); - DO_TYPE(PTL_EVENT_GET_END); - DO_TYPE(PTL_EVENT_PUT_START); - DO_TYPE(PTL_EVENT_PUT_END); - DO_TYPE(PTL_EVENT_REPLY_START); - DO_TYPE(PTL_EVENT_REPLY_END); - DO_TYPE(PTL_EVENT_ACK); - DO_TYPE(PTL_EVENT_SEND_START); - DO_TYPE(PTL_EVENT_SEND_END); - DO_TYPE(PTL_EVENT_UNLINK); - default: - return "<unknown event type>"; - } -#undef DO_TYPE -} - -const char *ptllnd_msgtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTLLND_MSG_TYPE_INVALID); - DO_TYPE(PTLLND_MSG_TYPE_PUT); - DO_TYPE(PTLLND_MSG_TYPE_GET); - DO_TYPE(PTLLND_MSG_TYPE_IMMEDIATE); - DO_TYPE(PTLLND_MSG_TYPE_HELLO); - DO_TYPE(PTLLND_MSG_TYPE_NOOP); - DO_TYPE(PTLLND_MSG_TYPE_NAK); - default: - return "<unknown msg type>"; - } -#undef DO_TYPE -} - -const char *ptllnd_errtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_OK); - DO_TYPE(PTL_SEGV); - DO_TYPE(PTL_NO_SPACE); - DO_TYPE(PTL_ME_IN_USE); - DO_TYPE(PTL_NAL_FAILED); - DO_TYPE(PTL_NO_INIT); - DO_TYPE(PTL_IFACE_DUP); - DO_TYPE(PTL_IFACE_INVALID); - DO_TYPE(PTL_HANDLE_INVALID); - DO_TYPE(PTL_MD_INVALID); - DO_TYPE(PTL_ME_INVALID); - DO_TYPE(PTL_PROCESS_INVALID); - DO_TYPE(PTL_PT_INDEX_INVALID); - DO_TYPE(PTL_SR_INDEX_INVALID); - DO_TYPE(PTL_EQ_INVALID); - DO_TYPE(PTL_EQ_DROPPED); - DO_TYPE(PTL_EQ_EMPTY); - DO_TYPE(PTL_MD_NO_UPDATE); - DO_TYPE(PTL_FAIL); - DO_TYPE(PTL_AC_INDEX_INVALID); - DO_TYPE(PTL_MD_ILLEGAL); - DO_TYPE(PTL_ME_LIST_TOO_LONG); - DO_TYPE(PTL_MD_IN_USE); - DO_TYPE(PTL_NI_INVALID); - DO_TYPE(PTL_PID_INVALID); - DO_TYPE(PTL_PT_FULL); - DO_TYPE(PTL_VAL_FAILED); - DO_TYPE(PTL_NOT_IMPLEMENTED); - DO_TYPE(PTL_NO_ACK); - DO_TYPE(PTL_EQ_IN_USE); - DO_TYPE(PTL_PID_IN_USE); - DO_TYPE(PTL_INV_EQ_SIZE); - DO_TYPE(PTL_AGAIN); - default: - return "<unknown error type>"; - } -#undef DO_TYPE -} diff --git a/lnet/ulnds/ptllnd/ptllnd.h b/lnet/ulnds/ptllnd/ptllnd.h deleted file mode 100644 index 3cd83b99970ab4435cd3bd76fb586e605d006d8e..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/ptllnd.h +++ /dev/null @@ -1,274 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: Eric Barton <eeb@bartonsoftware.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - - -#define DEBUG_SUBSYSTEM S_LND - -#include <lnet/lib-lnet.h> -#include <lnet/ptllnd_wire.h> - -#include <portals/p30.h> -#include <lnet/ptllnd.h> /* Depends on portals/p30.h */ -#include <stdarg.h> - -/* Hack to record history - * This should really be done by CDEBUG(D_NETTRACE... */ - -typedef struct { - struct list_head he_list; - struct timeval he_time; - const char *he_fn; - const char *he_file; - int he_seq; - int he_line; - char he_msg[80]; -} ptllnd_he_t; - -void ptllnd_dump_history(); -void ptllnd_history(const char *fn, const char *file, const int line, - const char *fmt, ...); -#define PTLLND_HISTORY(fmt, a...) \ - ptllnd_history(__FUNCTION__, __FILE__, __LINE__, fmt, ## a) - - -#define PTLLND_MD_OPTIONS (PTL_MD_LUSTRE_COMPLETION_SEMANTICS |\ - PTL_MD_EVENT_START_DISABLE) -typedef struct -{ - int plni_portal; - ptl_pid_t plni_ptllnd_pid; /* Portals PID of peers I may connect to */ - int plni_peer_credits; - int plni_max_msg_size; - int plni_buffer_size; - int plni_msgs_spare; - int plni_peer_hash_size; - int plni_eq_size; - int plni_checksum; - int plni_max_tx_history; - int plni_abort_on_protocol_mismatch; - int plni_abort_on_nak; - int plni_dump_on_nak; - int plni_debug; - int plni_long_wait; - int plni_watchdog_interval; - int plni_timeout; - - __u64 plni_stamp; - struct list_head plni_active_txs; - struct list_head plni_zombie_txs; - int plni_ntxs; - int plni_nrxs; - - ptl_handle_ni_t plni_nih; - ptl_handle_eq_t plni_eqh; - ptl_process_id_t plni_portals_id; /* Portals ID of interface */ - - struct list_head *plni_peer_hash; - int plni_npeers; - - int plni_watchdog_nextt; - int plni_watchdog_peeridx; - - struct list_head plni_tx_history; - int plni_ntx_history; - - struct list_head plni_buffers; - int plni_nbuffers; - int plni_nposted_buffers; - int plni_nmsgs; -} ptllnd_ni_t; - -#define PTLLND_CREDIT_HIGHWATER(plni) ((plni)->plni_peer_credits - 1) - -typedef struct -{ - struct list_head plp_list; - lnet_ni_t *plp_ni; - lnet_process_id_t plp_id; - ptl_process_id_t plp_ptlid; - int plp_credits; /* # msg buffers reserved for me at peer */ - - /* credits for msg buffers I've posted for this peer... - * outstanding - free buffers I've still to inform my peer about - * sent - free buffers I've told my peer about - * lazy - additional buffers (over and above plni_peer_credits) - * posted to prevent peer blocking on sending a non-RDMA - * messages to me when LNET isn't eagerly responsive to - * the network (i.e. liblustre doesn't have control). - * extra_lazy - lazy credits not required any more. */ - int plp_outstanding_credits; - int plp_sent_credits; - int plp_lazy_credits; - int plp_extra_lazy_credits; - - int plp_max_msg_size; - int plp_refcount; - int plp_recvd_hello:1; - int plp_closing:1; - __u64 plp_match; - __u64 plp_stamp; - struct list_head plp_txq; - struct list_head plp_activeq; -} ptllnd_peer_t; - -typedef struct -{ - struct list_head plb_list; - lnet_ni_t *plb_ni; - int plb_posted; - ptl_handle_md_t plb_md; - char *plb_buffer; -} ptllnd_buffer_t; - -typedef struct -{ - ptllnd_peer_t *rx_peer; - kptl_msg_t *rx_msg; - int rx_nob; -} ptllnd_rx_t; - -typedef struct -{ - struct list_head tx_list; - int tx_type; - int tx_status; - ptllnd_peer_t *tx_peer; - lnet_msg_t *tx_lnetmsg; - lnet_msg_t *tx_lnetreplymsg; - unsigned int tx_niov; - ptl_md_iovec_t *tx_iov; - ptl_handle_md_t tx_bulkmdh; - ptl_handle_md_t tx_reqmdh; - struct timeval tx_bulk_posted; - struct timeval tx_bulk_done; - struct timeval tx_req_posted; - struct timeval tx_req_done; - int tx_completing; /* someone already completing */ - int tx_msgsize; /* # bytes in tx_msg */ - time_t tx_deadline; /* time to complete by */ - kptl_msg_t tx_msg; /* message to send */ -} ptllnd_tx_t; - -#define PTLLND_RDMA_WRITE 0x100 /* pseudo message type */ -#define PTLLND_RDMA_READ 0x101 /* (no msg actually sent) */ - -/* Hack to extract object type from event's user_ptr relies on (and checks) - * that structs are somewhat aligned. */ -#define PTLLND_EVENTARG_TYPE_TX 0x1 -#define PTLLND_EVENTARG_TYPE_BUF 0x2 -#define PTLLND_EVENTARG_TYPE_MASK 0x3 - -static inline void * -ptllnd_obj2eventarg (void *obj, int type) -{ - unsigned long ptr = (unsigned long)obj; - - LASSERT ((ptr & PTLLND_EVENTARG_TYPE_MASK) == 0); - LASSERT ((type & ~PTLLND_EVENTARG_TYPE_MASK) == 0); - - return (void *)(ptr | type); -} - -static inline int -ptllnd_eventarg2type (void *arg) -{ - unsigned long ptr = (unsigned long)arg; - - return (ptr & PTLLND_EVENTARG_TYPE_MASK); -} - -static inline void * -ptllnd_eventarg2obj (void *arg) -{ - unsigned long ptr = (unsigned long)arg; - - return (void *)(ptr & ~PTLLND_EVENTARG_TYPE_MASK); -} - -int ptllnd_parse_int_tunable(int *value, char *name, int dflt); -void ptllnd_cull_tx_history(ptllnd_ni_t *plni); -int ptllnd_startup(lnet_ni_t *ni); -void ptllnd_shutdown(lnet_ni_t *ni); -int ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg); -int ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int ptllnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, - void **new_privatep); - -ptllnd_tx_t *ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob); -void ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive); -int ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int n); -void ptllnd_wait(lnet_ni_t *ni, int milliseconds); -void ptllnd_check_sends(ptllnd_peer_t *peer); -void ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id); -void ptllnd_destroy_peer(ptllnd_peer_t *peer); -void ptllnd_close_peer(ptllnd_peer_t *peer, int error); -int ptllnd_post_buffer(ptllnd_buffer_t *buf); -int ptllnd_size_buffers (lnet_ni_t *ni, int delta); -const char *ptllnd_evtype2str(int type); -const char *ptllnd_msgtype2str(int type); -const char *ptllnd_errtype2str(int type); -char *ptllnd_ptlid2str(ptl_process_id_t id); -void ptllnd_dump_debug(lnet_ni_t *ni, lnet_process_id_t id); - - -static inline void -ptllnd_peer_addref (ptllnd_peer_t *peer) -{ - LASSERT (peer->plp_refcount > 0); - peer->plp_refcount++; -} - -static inline void -ptllnd_peer_decref (ptllnd_peer_t *peer) -{ - LASSERT (peer->plp_refcount > 0); - peer->plp_refcount--; - if (peer->plp_refcount == 0) - ptllnd_destroy_peer(peer); -} - -static inline lnet_nid_t -ptllnd_ptl2lnetnid(lnet_ni_t *ni, ptl_nid_t portals_nid) -{ - return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid); -} - -static inline ptl_nid_t -ptllnd_lnet2ptlnid(lnet_nid_t lnet_nid) -{ - return LNET_NIDADDR(lnet_nid); -} - -/* - * A note about lprintf(): - * Normally printf() is redirected to stdout of the console - * from which yod launched the catamount application. However - * there is a lot of initilziation code that runs before this - * redirection is hooked up, and printf() seems to go to the bit bucket - * - * To get any kind of debug output and init time lprintf() can - * be used to output to the console from which bookqk was used to - * boot the catamount node. This works for debugging some simple - * cases. - */ - - diff --git a/lnet/ulnds/ptllnd/ptllnd_cb.c b/lnet/ulnds/ptllnd/ptllnd_cb.c deleted file mode 100644 index 0c6f521c23f5d90f5351084cd6630f900bc302b8..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/ptllnd_cb.c +++ /dev/null @@ -1,1897 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: Eric Barton <eeb@bartonsoftware.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -void -ptllnd_set_tx_deadline(ptllnd_tx_t *tx) -{ - ptllnd_peer_t *peer = tx->tx_peer; - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - - tx->tx_deadline = cfs_time_current_sec() + plni->plni_timeout; -} - -void -ptllnd_post_tx(ptllnd_tx_t *tx) -{ - ptllnd_peer_t *peer = tx->tx_peer; - - ptllnd_set_tx_deadline(tx); - list_add_tail(&tx->tx_list, &peer->plp_txq); - ptllnd_check_sends(peer); -} - -char * -ptllnd_ptlid2str(ptl_process_id_t id) -{ - static char strs[8][32]; - static int idx = 0; - - char *str = strs[idx++]; - - if (idx >= sizeof(strs)/sizeof(strs[0])) - idx = 0; - - snprintf(str, sizeof(strs[0]), FMT_PTLID, id.pid, id.nid); - return str; -} - -void -ptllnd_destroy_peer(ptllnd_peer_t *peer) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - int nmsg = peer->plp_lazy_credits + - plni->plni_peer_credits; - - ptllnd_size_buffers(ni, -nmsg); - - LASSERT (peer->plp_closing); - LASSERT (plni->plni_npeers > 0); - LASSERT (list_empty(&peer->plp_txq)); - LASSERT (list_empty(&peer->plp_activeq)); - plni->plni_npeers--; - LIBCFS_FREE(peer, sizeof(*peer)); -} - -void -ptllnd_abort_txs(ptllnd_ni_t *plni, struct list_head *q) -{ - while (!list_empty(q)) { - ptllnd_tx_t *tx = list_entry(q->next, ptllnd_tx_t, tx_list); - - tx->tx_status = -ESHUTDOWN; - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &plni->plni_zombie_txs); - } -} - -void -ptllnd_close_peer(ptllnd_peer_t *peer, int error) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - - if (peer->plp_closing) - return; - - peer->plp_closing = 1; - - if (!list_empty(&peer->plp_txq) || - !list_empty(&peer->plp_activeq) || - error != 0) { - CWARN("Closing %s\n", libcfs_id2str(peer->plp_id)); - if (plni->plni_debug) - ptllnd_dump_debug(ni, peer->plp_id); - } - - ptllnd_abort_txs(plni, &peer->plp_txq); - ptllnd_abort_txs(plni, &peer->plp_activeq); - - list_del(&peer->plp_list); - ptllnd_peer_decref(peer); -} - -ptllnd_peer_t * -ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create) -{ - ptllnd_ni_t *plni = ni->ni_data; - unsigned int hash = LNET_NIDADDR(id.nid) % plni->plni_peer_hash_size; - struct list_head *tmp; - ptllnd_peer_t *plp; - ptllnd_tx_t *tx; - int rc; - - LASSERT (LNET_NIDNET(id.nid) == LNET_NIDNET(ni->ni_nid)); - - list_for_each(tmp, &plni->plni_peer_hash[hash]) { - plp = list_entry(tmp, ptllnd_peer_t, plp_list); - - if (plp->plp_id.nid == id.nid && - plp->plp_id.pid == id.pid) { - ptllnd_peer_addref(plp); - return plp; - } - } - - if (!create) - return NULL; - - /* New peer: check first for enough posted buffers */ - plni->plni_npeers++; - rc = ptllnd_size_buffers(ni, plni->plni_peer_credits); - if (rc != 0) { - plni->plni_npeers--; - return NULL; - } - - LIBCFS_ALLOC(plp, sizeof(*plp)); - if (plp == NULL) { - CERROR("Can't allocate new peer %s\n", libcfs_id2str(id)); - plni->plni_npeers--; - ptllnd_size_buffers(ni, -plni->plni_peer_credits); - return NULL; - } - - plp->plp_ni = ni; - plp->plp_id = id; - plp->plp_ptlid.nid = LNET_NIDADDR(id.nid); - plp->plp_ptlid.pid = plni->plni_ptllnd_pid; - plp->plp_credits = 1; /* add more later when she gives me credits */ - plp->plp_max_msg_size = plni->plni_max_msg_size; /* until I hear from her */ - plp->plp_sent_credits = 1; /* Implicit credit for HELLO */ - plp->plp_outstanding_credits = plni->plni_peer_credits - 1; - plp->plp_lazy_credits = 0; - plp->plp_extra_lazy_credits = 0; - plp->plp_match = 0; - plp->plp_stamp = 0; - plp->plp_recvd_hello = 0; - plp->plp_closing = 0; - plp->plp_refcount = 1; - CFS_INIT_LIST_HEAD(&plp->plp_list); - CFS_INIT_LIST_HEAD(&plp->plp_txq); - CFS_INIT_LIST_HEAD(&plp->plp_activeq); - - ptllnd_peer_addref(plp); - list_add_tail(&plp->plp_list, &plni->plni_peer_hash[hash]); - - tx = ptllnd_new_tx(plp, PTLLND_MSG_TYPE_HELLO, 0); - if (tx == NULL) { - CERROR("Can't send HELLO to %s\n", libcfs_id2str(id)); - ptllnd_close_peer(plp, -ENOMEM); - ptllnd_peer_decref(plp); - return NULL; - } - - tx->tx_msg.ptlm_u.hello.kptlhm_matchbits = PTL_RESERVED_MATCHBITS; - tx->tx_msg.ptlm_u.hello.kptlhm_max_msg_size = plni->plni_max_msg_size; - - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: post hello %p", libcfs_id2str(id), - tx->tx_peer->plp_credits, - tx->tx_peer->plp_outstanding_credits, - tx->tx_peer->plp_sent_credits, - plni->plni_peer_credits + - tx->tx_peer->plp_lazy_credits, tx); - ptllnd_post_tx(tx); - - return plp; -} - -int -ptllnd_count_q(struct list_head *q) -{ - struct list_head *e; - int n = 0; - - list_for_each(e, q) { - n++; - } - - return n; -} - -const char * -ptllnd_tx_typestr(int type) -{ - switch (type) { - case PTLLND_RDMA_WRITE: - return "rdma_write"; - - case PTLLND_RDMA_READ: - return "rdma_read"; - - case PTLLND_MSG_TYPE_PUT: - return "put_req"; - - case PTLLND_MSG_TYPE_GET: - return "get_req"; - - case PTLLND_MSG_TYPE_IMMEDIATE: - return "immediate"; - - case PTLLND_MSG_TYPE_NOOP: - return "noop"; - - case PTLLND_MSG_TYPE_HELLO: - return "hello"; - - default: - return "<unknown>"; - } -} - -void -ptllnd_debug_tx(ptllnd_tx_t *tx) -{ - CDEBUG(D_WARNING, "%s %s b %ld.%06ld/%ld.%06ld" - " r %ld.%06ld/%ld.%06ld status %d\n", - ptllnd_tx_typestr(tx->tx_type), - libcfs_id2str(tx->tx_peer->plp_id), - tx->tx_bulk_posted.tv_sec, tx->tx_bulk_posted.tv_usec, - tx->tx_bulk_done.tv_sec, tx->tx_bulk_done.tv_usec, - tx->tx_req_posted.tv_sec, tx->tx_req_posted.tv_usec, - tx->tx_req_done.tv_sec, tx->tx_req_done.tv_usec, - tx->tx_status); -} - -void -ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id) -{ - ptllnd_peer_t *plp = ptllnd_find_peer(ni, id, 0); - struct list_head *tmp; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx; - - if (plp == NULL) { - CDEBUG(D_WARNING, "No peer %s\n", libcfs_id2str(id)); - return; - } - - CDEBUG(D_WARNING, "%s %s%s [%d] "LPU64".%06d m "LPU64" q %d/%d c %d/%d+%d(%d)\n", - libcfs_id2str(id), - plp->plp_recvd_hello ? "H" : "_", - plp->plp_closing ? "C" : "_", - plp->plp_refcount, - plp->plp_stamp / 1000000, (int)(plp->plp_stamp % 1000000), - plp->plp_match, - ptllnd_count_q(&plp->plp_txq), - ptllnd_count_q(&plp->plp_activeq), - plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits, - plni->plni_peer_credits + plp->plp_lazy_credits); - - CDEBUG(D_WARNING, "txq:\n"); - list_for_each (tmp, &plp->plp_txq) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - ptllnd_debug_tx(tx); - } - - CDEBUG(D_WARNING, "activeq:\n"); - list_for_each (tmp, &plp->plp_activeq) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - ptllnd_debug_tx(tx); - } - - CDEBUG(D_WARNING, "zombies:\n"); - list_for_each (tmp, &plni->plni_zombie_txs) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - if (tx->tx_peer->plp_id.nid == id.nid && - tx->tx_peer->plp_id.pid == id.pid) - ptllnd_debug_tx(tx); - } - - CDEBUG(D_WARNING, "history:\n"); - list_for_each (tmp, &plni->plni_tx_history) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - if (tx->tx_peer->plp_id.nid == id.nid && - tx->tx_peer->plp_id.pid == id.pid) - ptllnd_debug_tx(tx); - } - - ptllnd_peer_decref(plp); -} - -void -ptllnd_dump_debug(lnet_ni_t *ni, lnet_process_id_t id) -{ - ptllnd_debug_peer(ni, id); - ptllnd_dump_history(); -} - -void -ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive) -{ - lnet_process_id_t id; - ptllnd_peer_t *peer; - time_t start = cfs_time_current_sec(); - ptllnd_ni_t *plni = ni->ni_data; - int w = plni->plni_long_wait; - - /* This is only actually used to connect to routers at startup! */ - LASSERT(alive); - - id.nid = nid; - id.pid = LUSTRE_SRV_LNET_PID; - - peer = ptllnd_find_peer(ni, id, 1); - if (peer == NULL) - return; - - /* wait for the peer to reply */ - while (!peer->plp_recvd_hello) { - if (w > 0 && cfs_time_current_sec() > start + w/1000) { - CWARN("Waited %ds to connect to %s\n", - (int)(cfs_time_current_sec() - start), - libcfs_id2str(id)); - w *= 2; - } - - ptllnd_wait(ni, w); - } - - ptllnd_peer_decref(peer); -} - -int -ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int nasync) -{ - ptllnd_peer_t *peer = ptllnd_find_peer(ni, id, nasync > 0); - int rc; - - if (peer == NULL) - return -ENOMEM; - - LASSERT (peer->plp_lazy_credits >= 0); - LASSERT (peer->plp_extra_lazy_credits >= 0); - - /* If nasync < 0, we're being told we can reduce the total message - * headroom. We can't do this right now because our peer might already - * have credits for the extra buffers, so we just account the extra - * headroom in case we need it later and only destroy buffers when the - * peer closes. - * - * Note that the following condition handles this case, where it - * actually increases the extra lazy credit counter. */ - - if (nasync <= peer->plp_extra_lazy_credits) { - peer->plp_extra_lazy_credits -= nasync; - return 0; - } - - LASSERT (nasync > 0); - - nasync -= peer->plp_extra_lazy_credits; - peer->plp_extra_lazy_credits = 0; - - rc = ptllnd_size_buffers(ni, nasync); - if (rc == 0) { - peer->plp_lazy_credits += nasync; - peer->plp_outstanding_credits += nasync; - } - - return rc; -} - -__u32 -ptllnd_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -ptllnd_tx_t * -ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx; - int msgsize; - - CDEBUG(D_NET, "peer=%p type=%d payload=%d\n", peer, type, payload_nob); - - switch (type) { - default: - LBUG(); - - case PTLLND_RDMA_WRITE: - case PTLLND_RDMA_READ: - LASSERT (payload_nob == 0); - msgsize = 0; - break; - - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - LASSERT (payload_nob == 0); - msgsize = offsetof(kptl_msg_t, ptlm_u) + - sizeof(kptl_rdma_msg_t); - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - msgsize = offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload[payload_nob]); - break; - - case PTLLND_MSG_TYPE_NOOP: - LASSERT (payload_nob == 0); - msgsize = offsetof(kptl_msg_t, ptlm_u); - break; - - case PTLLND_MSG_TYPE_HELLO: - LASSERT (payload_nob == 0); - msgsize = offsetof(kptl_msg_t, ptlm_u) + - sizeof(kptl_hello_msg_t); - break; - } - - msgsize = (msgsize + 7) & ~7; - LASSERT (msgsize <= peer->plp_max_msg_size); - - LIBCFS_ALLOC(tx, offsetof(ptllnd_tx_t, tx_msg) + msgsize); - - if (tx == NULL) { - CERROR("Can't allocate msg type %d for %s\n", - type, libcfs_id2str(peer->plp_id)); - return NULL; - } - - CFS_INIT_LIST_HEAD(&tx->tx_list); - tx->tx_peer = peer; - tx->tx_type = type; - tx->tx_lnetmsg = tx->tx_lnetreplymsg = NULL; - tx->tx_niov = 0; - tx->tx_iov = NULL; - tx->tx_reqmdh = PTL_INVALID_HANDLE; - tx->tx_bulkmdh = PTL_INVALID_HANDLE; - tx->tx_msgsize = msgsize; - tx->tx_completing = 0; - tx->tx_status = 0; - - memset(&tx->tx_bulk_posted, 0, sizeof(tx->tx_bulk_posted)); - memset(&tx->tx_bulk_done, 0, sizeof(tx->tx_bulk_done)); - memset(&tx->tx_req_posted, 0, sizeof(tx->tx_req_posted)); - memset(&tx->tx_req_done, 0, sizeof(tx->tx_req_done)); - - if (msgsize != 0) { - tx->tx_msg.ptlm_magic = PTLLND_MSG_MAGIC; - tx->tx_msg.ptlm_version = PTLLND_MSG_VERSION; - tx->tx_msg.ptlm_type = type; - tx->tx_msg.ptlm_credits = 0; - tx->tx_msg.ptlm_nob = msgsize; - tx->tx_msg.ptlm_cksum = 0; - tx->tx_msg.ptlm_srcnid = ni->ni_nid; - tx->tx_msg.ptlm_srcstamp = plni->plni_stamp; - tx->tx_msg.ptlm_dstnid = peer->plp_id.nid; - tx->tx_msg.ptlm_dststamp = peer->plp_stamp; - tx->tx_msg.ptlm_srcpid = the_lnet.ln_pid; - tx->tx_msg.ptlm_dstpid = peer->plp_id.pid; - } - - ptllnd_peer_addref(peer); - plni->plni_ntxs++; - - CDEBUG(D_NET, "tx=%p\n",tx); - - return tx; -} - -void -ptllnd_abort_tx(ptllnd_tx_t *tx, ptl_handle_md_t *mdh) -{ - ptllnd_peer_t *peer = tx->tx_peer; - lnet_ni_t *ni = peer->plp_ni; - int rc; - time_t start = cfs_time_current_sec(); - ptllnd_ni_t *plni = ni->ni_data; - int w = plni->plni_long_wait; - - while (!PtlHandleIsEqual(*mdh, PTL_INVALID_HANDLE)) { - rc = PtlMDUnlink(*mdh); -#ifndef LUSTRE_PORTALS_UNLINK_SEMANTICS - if (rc == PTL_OK) /* unlink successful => no unlinked event */ - return; - LASSERT (rc == PTL_MD_IN_USE); -#endif - if (w > 0 && cfs_time_current_sec() > start + w/1000) { - CWARN("Waited %ds to abort tx to %s\n", - (int)(cfs_time_current_sec() - start), - libcfs_id2str(peer->plp_id)); - w *= 2; - } - /* Wait for ptllnd_tx_event() to invalidate */ - ptllnd_wait(ni, w); - } -} - -void -ptllnd_cull_tx_history(ptllnd_ni_t *plni) -{ - int max = plni->plni_max_tx_history; - - while (plni->plni_ntx_history > max) { - ptllnd_tx_t *tx = list_entry(plni->plni_tx_history.next, - ptllnd_tx_t, tx_list); - list_del(&tx->tx_list); - - ptllnd_peer_decref(tx->tx_peer); - - LIBCFS_FREE(tx, offsetof(ptllnd_tx_t, tx_msg) + tx->tx_msgsize); - - LASSERT (plni->plni_ntxs > 0); - plni->plni_ntxs--; - plni->plni_ntx_history--; - } -} - -void -ptllnd_tx_done(ptllnd_tx_t *tx) -{ - ptllnd_peer_t *peer = tx->tx_peer; - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - - /* CAVEAT EMPTOR: If this tx is being aborted, I'll continue to get - * events for this tx until it's unlinked. So I set tx_completing to - * flag the tx is getting handled */ - - if (tx->tx_completing) - return; - - tx->tx_completing = 1; - - if (!list_empty(&tx->tx_list)) - list_del_init(&tx->tx_list); - - if (tx->tx_status != 0) { - if (plni->plni_debug) { - CERROR("Completing tx for %s with error %d\n", - libcfs_id2str(peer->plp_id), tx->tx_status); - ptllnd_debug_tx(tx); - } - ptllnd_close_peer(peer, tx->tx_status); - } - - ptllnd_abort_tx(tx, &tx->tx_reqmdh); - ptllnd_abort_tx(tx, &tx->tx_bulkmdh); - - if (tx->tx_niov > 0) { - LIBCFS_FREE(tx->tx_iov, tx->tx_niov * sizeof(*tx->tx_iov)); - tx->tx_niov = 0; - } - - if (tx->tx_lnetreplymsg != NULL) { - LASSERT (tx->tx_type == PTLLND_MSG_TYPE_GET); - LASSERT (tx->tx_lnetmsg != NULL); - /* Simulate GET success always */ - lnet_finalize(ni, tx->tx_lnetmsg, 0); - CDEBUG(D_NET, "lnet_finalize(tx_lnetreplymsg=%p)\n",tx->tx_lnetreplymsg); - lnet_finalize(ni, tx->tx_lnetreplymsg, tx->tx_status); - } else if (tx->tx_lnetmsg != NULL) { - lnet_finalize(ni, tx->tx_lnetmsg, tx->tx_status); - } - - plni->plni_ntx_history++; - list_add_tail(&tx->tx_list, &plni->plni_tx_history); - - ptllnd_cull_tx_history(plni); -} - -int -ptllnd_set_txiov(ptllnd_tx_t *tx, - unsigned int niov, struct iovec *iov, - unsigned int offset, unsigned int len) -{ - ptl_md_iovec_t *piov; - int npiov; - - if (len == 0) { - tx->tx_niov = 0; - return 0; - } - - /* - * Remove iovec's at the beginning that - * are skipped because of the offset. - * Adjust the offset accordingly - */ - for (;;) { - LASSERT (niov > 0); - if (offset < iov->iov_len) - break; - offset -= iov->iov_len; - niov--; - iov++; - } - - for (;;) { - int temp_offset = offset; - int resid = len; - LIBCFS_ALLOC(piov, niov * sizeof(*piov)); - if (piov == NULL) - return -ENOMEM; - - for (npiov = 0;; npiov++) { - LASSERT (npiov < niov); - LASSERT (iov->iov_len >= temp_offset); - - piov[npiov].iov_base = iov[npiov].iov_base + temp_offset; - piov[npiov].iov_len = iov[npiov].iov_len - temp_offset; - - if (piov[npiov].iov_len >= resid) { - piov[npiov].iov_len = resid; - npiov++; - break; - } - resid -= piov[npiov].iov_len; - temp_offset = 0; - } - - if (npiov == niov) { - tx->tx_niov = niov; - tx->tx_iov = piov; - return 0; - } - - /* Dang! The piov I allocated was too big and it's a drag to - * have to maintain separate 'allocated' and 'used' sizes, so - * I'll just do it again; NB this doesn't happen normally... */ - LIBCFS_FREE(piov, niov * sizeof(*piov)); - niov = npiov; - } -} - -void -ptllnd_set_md_buffer(ptl_md_t *md, ptllnd_tx_t *tx) -{ - unsigned int niov = tx->tx_niov; - ptl_md_iovec_t *iov = tx->tx_iov; - - LASSERT ((md->options & PTL_MD_IOVEC) == 0); - - if (niov == 0) { - md->start = NULL; - md->length = 0; - } else if (niov == 1) { - md->start = iov[0].iov_base; - md->length = iov[0].iov_len; - } else { - md->start = iov; - md->length = niov; - md->options |= PTL_MD_IOVEC; - } -} - -int -ptllnd_post_buffer(ptllnd_buffer_t *buf) -{ - lnet_ni_t *ni = buf->plb_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptl_process_id_t anyid = { - .nid = PTL_NID_ANY, - .pid = PTL_PID_ANY}; - ptl_md_t md = { - .start = buf->plb_buffer, - .length = plni->plni_buffer_size, - .threshold = PTL_MD_THRESH_INF, - .max_size = plni->plni_max_msg_size, - .options = (PTLLND_MD_OPTIONS | - PTL_MD_OP_PUT | PTL_MD_MAX_SIZE | - PTL_MD_LOCAL_ALIGN8), - .user_ptr = ptllnd_obj2eventarg(buf, PTLLND_EVENTARG_TYPE_BUF), - .eq_handle = plni->plni_eqh}; - ptl_handle_me_t meh; - int rc; - - LASSERT (!buf->plb_posted); - - rc = PtlMEAttach(plni->plni_nih, plni->plni_portal, - anyid, LNET_MSG_MATCHBITS, 0, - PTL_UNLINK, PTL_INS_AFTER, &meh); - if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %s(%d)\n", - ptllnd_errtype2str(rc), rc); - return -ENOMEM; - } - - buf->plb_posted = 1; - plni->plni_nposted_buffers++; - - rc = PtlMDAttach(meh, md, LNET_UNLINK, &buf->plb_md); - if (rc == PTL_OK) - return 0; - - CERROR("PtlMDAttach failed: %s(%d)\n", - ptllnd_errtype2str(rc), rc); - - buf->plb_posted = 0; - plni->plni_nposted_buffers--; - - rc = PtlMEUnlink(meh); - LASSERT (rc == PTL_OK); - - return -ENOMEM; -} - -void -ptllnd_check_sends(ptllnd_peer_t *peer) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx; - ptl_md_t md; - ptl_handle_md_t mdh; - int rc; - - CDEBUG(D_NET, "%s: [%d/%d+%d(%d)\n", - libcfs_id2str(peer->plp_id), peer->plp_credits, - peer->plp_outstanding_credits, peer->plp_sent_credits, - plni->plni_peer_credits + peer->plp_lazy_credits); - - if (list_empty(&peer->plp_txq) && - peer->plp_outstanding_credits >= PTLLND_CREDIT_HIGHWATER(plni) && - peer->plp_credits != 0) { - - tx = ptllnd_new_tx(peer, PTLLND_MSG_TYPE_NOOP, 0); - CDEBUG(D_NET, "NOOP tx=%p\n",tx); - if (tx == NULL) { - CERROR("Can't return credits to %s\n", - libcfs_id2str(peer->plp_id)); - } else { - ptllnd_set_tx_deadline(tx); - list_add_tail(&tx->tx_list, &peer->plp_txq); - } - } - - while (!list_empty(&peer->plp_txq)) { - tx = list_entry(peer->plp_txq.next, ptllnd_tx_t, tx_list); - - LASSERT (tx->tx_msgsize > 0); - - LASSERT (peer->plp_outstanding_credits >= 0); - LASSERT (peer->plp_sent_credits >= 0); - LASSERT (peer->plp_outstanding_credits + peer->plp_sent_credits - <= plni->plni_peer_credits + peer->plp_lazy_credits); - LASSERT (peer->plp_credits >= 0); - - if (peer->plp_credits == 0) { /* no credits */ - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: no creds for %p", - libcfs_id2str(peer->plp_id), - peer->plp_credits, - peer->plp_outstanding_credits, - peer->plp_sent_credits, - plni->plni_peer_credits + - peer->plp_lazy_credits, tx); - break; - } - - if (peer->plp_credits == 1 && /* last credit reserved for */ - peer->plp_outstanding_credits == 0) { /* returning credits */ - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: too few creds for %p", - libcfs_id2str(peer->plp_id), - peer->plp_credits, - peer->plp_outstanding_credits, - peer->plp_sent_credits, - plni->plni_peer_credits + - peer->plp_lazy_credits, tx); - break; - } - - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &peer->plp_activeq); - - CDEBUG(D_NET, "Sending at TX=%p type=%s (%d)\n",tx, - ptllnd_msgtype2str(tx->tx_type),tx->tx_type); - - if (tx->tx_type == PTLLND_MSG_TYPE_NOOP && - (!list_empty(&peer->plp_txq) || - peer->plp_outstanding_credits < - PTLLND_CREDIT_HIGHWATER(plni))) { - /* redundant NOOP */ - ptllnd_tx_done(tx); - continue; - } - - /* Set stamp at the last minute; on a new peer, I don't know it - * until I receive the HELLO back */ - tx->tx_msg.ptlm_dststamp = peer->plp_stamp; - - /* - * Return all the credits we have - */ - tx->tx_msg.ptlm_credits = peer->plp_outstanding_credits; - peer->plp_sent_credits += peer->plp_outstanding_credits; - peer->plp_outstanding_credits = 0; - - /* - * One less credit - */ - peer->plp_credits--; - - if (plni->plni_checksum) - tx->tx_msg.ptlm_cksum = - ptllnd_cksum(&tx->tx_msg, - offsetof(kptl_msg_t, ptlm_u)); - - md.user_ptr = ptllnd_obj2eventarg(tx, PTLLND_EVENTARG_TYPE_TX); - md.eq_handle = plni->plni_eqh; - md.threshold = 1; - md.options = PTLLND_MD_OPTIONS; - md.start = &tx->tx_msg; - md.length = tx->tx_msgsize; - - rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh); - if (rc != PTL_OK) { - CERROR("PtlMDBind for %s failed: %s(%d)\n", - libcfs_id2str(peer->plp_id), - ptllnd_errtype2str(rc), rc); - tx->tx_status = -EIO; - ptllnd_tx_done(tx); - break; - } - - LASSERT (tx->tx_type != PTLLND_RDMA_WRITE && - tx->tx_type != PTLLND_RDMA_READ); - - tx->tx_reqmdh = mdh; - gettimeofday(&tx->tx_req_posted, NULL); - - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: %s %p c %d", - libcfs_id2str(peer->plp_id), - peer->plp_credits, - peer->plp_outstanding_credits, - peer->plp_sent_credits, - plni->plni_peer_credits + - peer->plp_lazy_credits, - ptllnd_msgtype2str(tx->tx_type), tx, - tx->tx_msg.ptlm_credits); - - rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid, - plni->plni_portal, 0, LNET_MSG_MATCHBITS, 0, 0); - if (rc != PTL_OK) { - CERROR("PtlPut for %s failed: %s(%d)\n", - libcfs_id2str(peer->plp_id), - ptllnd_errtype2str(rc), rc); - tx->tx_status = -EIO; - ptllnd_tx_done(tx); - break; - } - } -} - -int -ptllnd_passive_rdma(ptllnd_peer_t *peer, int type, lnet_msg_t *msg, - unsigned int niov, struct iovec *iov, - unsigned int offset, unsigned int len) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx = ptllnd_new_tx(peer, type, 0); - __u64 matchbits; - ptl_md_t md; - ptl_handle_md_t mdh; - ptl_handle_me_t meh; - int rc; - int rc2; - time_t start; - int w; - - CDEBUG(D_NET, "niov=%d offset=%d len=%d\n",niov,offset,len); - - LASSERT (type == PTLLND_MSG_TYPE_GET || - type == PTLLND_MSG_TYPE_PUT); - - if (tx == NULL) { - CERROR("Can't allocate %s tx for %s\n", - type == PTLLND_MSG_TYPE_GET ? "GET" : "PUT/REPLY", - libcfs_id2str(peer->plp_id)); - return -ENOMEM; - } - - rc = ptllnd_set_txiov(tx, niov, iov, offset, len); - if (rc != 0) { - CERROR ("Can't allocate iov %d for %s\n", - niov, libcfs_id2str(peer->plp_id)); - rc = -ENOMEM; - goto failed; - } - - md.user_ptr = ptllnd_obj2eventarg(tx, PTLLND_EVENTARG_TYPE_TX); - md.eq_handle = plni->plni_eqh; - md.threshold = 1; - md.max_size = 0; - md.options = PTLLND_MD_OPTIONS; - if(type == PTLLND_MSG_TYPE_GET) - md.options |= PTL_MD_OP_PUT | PTL_MD_ACK_DISABLE; - else - md.options |= PTL_MD_OP_GET; - ptllnd_set_md_buffer(&md, tx); - - start = cfs_time_current_sec(); - w = plni->plni_long_wait; - - while (!peer->plp_recvd_hello) { /* wait to validate plp_match */ - if (peer->plp_closing) { - rc = -EIO; - goto failed; - } - if (w > 0 && cfs_time_current_sec() > start + w/1000) { - CWARN("Waited %ds to connect to %s\n", - (int)(cfs_time_current_sec() - start), - libcfs_id2str(peer->plp_id)); - w *= 2; - } - ptllnd_wait(ni, w); - } - - if (peer->plp_match < PTL_RESERVED_MATCHBITS) - peer->plp_match = PTL_RESERVED_MATCHBITS; - matchbits = peer->plp_match++; - - rc = PtlMEAttach(plni->plni_nih, plni->plni_portal, peer->plp_ptlid, - matchbits, 0, PTL_UNLINK, PTL_INS_BEFORE, &meh); - if (rc != PTL_OK) { - CERROR("PtlMEAttach for %s failed: %s(%d)\n", - libcfs_id2str(peer->plp_id), - ptllnd_errtype2str(rc), rc); - rc = -EIO; - goto failed; - } - - gettimeofday(&tx->tx_bulk_posted, NULL); - - rc = PtlMDAttach(meh, md, LNET_UNLINK, &mdh); - if (rc != PTL_OK) { - CERROR("PtlMDAttach for %s failed: %s(%d)\n", - libcfs_id2str(peer->plp_id), - ptllnd_errtype2str(rc), rc); - rc2 = PtlMEUnlink(meh); - LASSERT (rc2 == PTL_OK); - rc = -EIO; - goto failed; - } - tx->tx_bulkmdh = mdh; - - /* - * We need to set the stamp here because it - * we could have received a HELLO above that set - * peer->plp_stamp - */ - tx->tx_msg.ptlm_dststamp = peer->plp_stamp; - - tx->tx_msg.ptlm_u.rdma.kptlrm_hdr = msg->msg_hdr; - tx->tx_msg.ptlm_u.rdma.kptlrm_matchbits = matchbits; - - if (type == PTLLND_MSG_TYPE_GET) { - tx->tx_lnetreplymsg = lnet_create_reply_msg(ni, msg); - if (tx->tx_lnetreplymsg == NULL) { - CERROR("Can't create reply for GET to %s\n", - libcfs_id2str(msg->msg_target)); - rc = -ENOMEM; - goto failed; - } - } - - tx->tx_lnetmsg = msg; - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: post passive %s p %d %p", - libcfs_id2str(msg->msg_target), - peer->plp_credits, peer->plp_outstanding_credits, - peer->plp_sent_credits, - plni->plni_peer_credits + peer->plp_lazy_credits, - lnet_msgtyp2str(msg->msg_type), - (le32_to_cpu(msg->msg_type) == LNET_MSG_PUT) ? - le32_to_cpu(msg->msg_hdr.msg.put.ptl_index) : - (le32_to_cpu(msg->msg_type) == LNET_MSG_GET) ? - le32_to_cpu(msg->msg_hdr.msg.get.ptl_index) : -1, - tx); - ptllnd_post_tx(tx); - return 0; - - failed: - ptllnd_tx_done(tx); - return rc; -} - -int -ptllnd_active_rdma(ptllnd_peer_t *peer, int type, - lnet_msg_t *msg, __u64 matchbits, - unsigned int niov, struct iovec *iov, - unsigned int offset, unsigned int len) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx = ptllnd_new_tx(peer, type, 0); - ptl_md_t md; - ptl_handle_md_t mdh; - int rc; - - LASSERT (type == PTLLND_RDMA_READ || - type == PTLLND_RDMA_WRITE); - - if (tx == NULL) { - CERROR("Can't allocate tx for RDMA %s with %s\n", - (type == PTLLND_RDMA_WRITE) ? "write" : "read", - libcfs_id2str(peer->plp_id)); - ptllnd_close_peer(peer, -ENOMEM); - return -ENOMEM; - } - - rc = ptllnd_set_txiov(tx, niov, iov, offset, len); - if (rc != 0) { - CERROR ("Can't allocate iov %d for %s\n", - niov, libcfs_id2str(peer->plp_id)); - rc = -ENOMEM; - goto failed; - } - - md.user_ptr = ptllnd_obj2eventarg(tx, PTLLND_EVENTARG_TYPE_TX); - md.eq_handle = plni->plni_eqh; - md.max_size = 0; - md.options = PTLLND_MD_OPTIONS; - md.threshold = (type == PTLLND_RDMA_READ) ? 2 : 1; - - ptllnd_set_md_buffer(&md, tx); - - rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh); - if (rc != PTL_OK) { - CERROR("PtlMDBind for %s failed: %s(%d)\n", - libcfs_id2str(peer->plp_id), - ptllnd_errtype2str(rc), rc); - rc = -EIO; - goto failed; - } - - tx->tx_bulkmdh = mdh; - tx->tx_lnetmsg = msg; - - ptllnd_set_tx_deadline(tx); - list_add_tail(&tx->tx_list, &peer->plp_activeq); - gettimeofday(&tx->tx_bulk_posted, NULL); - - if (type == PTLLND_RDMA_READ) - rc = PtlGet(mdh, peer->plp_ptlid, - plni->plni_portal, 0, matchbits, 0); - else - rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid, - plni->plni_portal, 0, matchbits, 0, - (msg == NULL) ? PTLLND_RDMA_FAIL : PTLLND_RDMA_OK); - - if (rc == PTL_OK) - return 0; - - CERROR("Can't initiate RDMA with %s: %s(%d)\n", - libcfs_id2str(peer->plp_id), - ptllnd_errtype2str(rc), rc); - - tx->tx_lnetmsg = NULL; - failed: - tx->tx_status = rc; - ptllnd_tx_done(tx); /* this will close peer */ - return rc; -} - -int -ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_peer_t *plp; - ptllnd_tx_t *tx; - int nob; - int rc; - - LASSERT (!msg->msg_routing); - LASSERT (msg->msg_kiov == NULL); - - LASSERT (msg->msg_niov <= PTL_MD_MAX_IOV); /* !!! */ - - CDEBUG(D_NET, "%s [%d]+%d,%d -> %s%s\n", - lnet_msgtyp2str(msg->msg_type), - msg->msg_niov, msg->msg_offset, msg->msg_len, - libcfs_nid2str(msg->msg_target.nid), - msg->msg_target_is_router ? "(rtr)" : ""); - - if ((msg->msg_target.pid & LNET_PID_USERFLAG) != 0) { - CERROR("Can't send to non-kernel peer %s\n", - libcfs_id2str(msg->msg_target)); - return -EHOSTUNREACH; - } - - plp = ptllnd_find_peer(ni, msg->msg_target, 1); - if (plp == NULL) - return -ENOMEM; - - switch (msg->msg_type) { - default: - LBUG(); - - case LNET_MSG_ACK: - LASSERT (msg->msg_len == 0); - break; /* send IMMEDIATE */ - - case LNET_MSG_GET: - if (msg->msg_target_is_router) - break; /* send IMMEDIATE */ - - nob = msg->msg_md->md_length; - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[nob]); - if (nob <= plni->plni_max_msg_size) - break; - - LASSERT ((msg->msg_md->md_options & LNET_MD_KIOV) == 0); - rc = ptllnd_passive_rdma(plp, PTLLND_MSG_TYPE_GET, msg, - msg->msg_md->md_niov, - msg->msg_md->md_iov.iov, - 0, msg->msg_md->md_length); - ptllnd_peer_decref(plp); - return rc; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - nob = msg->msg_len; - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[nob]); - if (nob <= plp->plp_max_msg_size) - break; /* send IMMEDIATE */ - - rc = ptllnd_passive_rdma(plp, PTLLND_MSG_TYPE_PUT, msg, - msg->msg_niov, msg->msg_iov, - msg->msg_offset, msg->msg_len); - ptllnd_peer_decref(plp); - return rc; - } - - /* send IMMEDIATE - * NB copy the payload so we don't have to do a fragmented send */ - - tx = ptllnd_new_tx(plp, PTLLND_MSG_TYPE_IMMEDIATE, msg->msg_len); - if (tx == NULL) { - CERROR("Can't allocate tx for lnet type %d to %s\n", - msg->msg_type, libcfs_id2str(msg->msg_target)); - ptllnd_peer_decref(plp); - return -ENOMEM; - } - - lnet_copy_iov2flat(tx->tx_msgsize, &tx->tx_msg, - offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload), - msg->msg_niov, msg->msg_iov, msg->msg_offset, - msg->msg_len); - tx->tx_msg.ptlm_u.immediate.kptlim_hdr = msg->msg_hdr; - - tx->tx_lnetmsg = msg; - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: post immediate %s p %d %p", - libcfs_id2str(msg->msg_target), - plp->plp_credits, plp->plp_outstanding_credits, - plp->plp_sent_credits, - plni->plni_peer_credits + plp->plp_lazy_credits, - lnet_msgtyp2str(msg->msg_type), - (le32_to_cpu(msg->msg_type) == LNET_MSG_PUT) ? - le32_to_cpu(msg->msg_hdr.msg.put.ptl_index) : - (le32_to_cpu(msg->msg_type) == LNET_MSG_GET) ? - le32_to_cpu(msg->msg_hdr.msg.get.ptl_index) : -1, - tx); - ptllnd_post_tx(tx); - ptllnd_peer_decref(plp); - return 0; -} - -void -ptllnd_rx_done(ptllnd_rx_t *rx) -{ - ptllnd_peer_t *plp = rx->rx_peer; - lnet_ni_t *ni = plp->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - - plp->plp_outstanding_credits++; - - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: rx=%p done\n", - libcfs_id2str(plp->plp_id), - plp->plp_credits, plp->plp_outstanding_credits, - plp->plp_sent_credits, - plni->plni_peer_credits + plp->plp_lazy_credits, rx); - - ptllnd_check_sends(rx->rx_peer); - - LASSERT (plni->plni_nrxs > 0); - plni->plni_nrxs--; -} - -int -ptllnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, - void **new_privatep) -{ - /* Shouldn't get here; recvs only block for router buffers */ - LBUG(); - return 0; -} - -int -ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - ptllnd_rx_t *rx = private; - int rc = 0; - int nob; - - LASSERT (kiov == NULL); - LASSERT (niov <= PTL_MD_MAX_IOV); /* !!! */ - - switch (rx->rx_msg->ptlm_type) { - default: - LBUG(); - - case PTLLND_MSG_TYPE_IMMEDIATE: - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[mlen]); - if (nob > rx->rx_nob) { - CERROR("Immediate message from %s too big: %d(%d)\n", - libcfs_id2str(rx->rx_peer->plp_id), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - lnet_copy_flat2iov(niov, iov, offset, - rx->rx_nob, rx->rx_msg, - offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload), - mlen); - lnet_finalize(ni, msg, 0); - break; - - case PTLLND_MSG_TYPE_PUT: - rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_READ, msg, - rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits, - niov, iov, offset, mlen); - break; - - case PTLLND_MSG_TYPE_GET: - if (msg != NULL) - rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_WRITE, msg, - rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits, - msg->msg_niov, msg->msg_iov, - msg->msg_offset, msg->msg_len); - else - rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_WRITE, NULL, - rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits, - 0, NULL, 0, 0); - break; - } - - ptllnd_rx_done(rx); - return rc; -} - -void -ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator, - kptl_msg_t *msg, unsigned int nob) -{ - ptllnd_ni_t *plni = ni->ni_data; - const int basenob = offsetof(kptl_msg_t, ptlm_u); - lnet_process_id_t srcid; - ptllnd_rx_t rx; - int flip; - __u16 msg_version; - __u32 msg_cksum; - ptllnd_peer_t *plp; - int rc; - - if (nob < 6) { - CERROR("Very short receive from %s\n", - ptllnd_ptlid2str(initiator)); - return; - } - - /* I can at least read MAGIC/VERSION */ - - flip = msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC); - if (!flip && msg->ptlm_magic != PTLLND_MSG_MAGIC) { - CERROR("Bad protocol magic %08x from %s\n", - msg->ptlm_magic, ptllnd_ptlid2str(initiator)); - return; - } - - msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version; - - if (msg_version != PTLLND_MSG_VERSION) { - CERROR("Bad protocol version %04x from %s: %04x expected\n", - (__u32)msg_version, ptllnd_ptlid2str(initiator), PTLLND_MSG_VERSION); - - if (plni->plni_abort_on_protocol_mismatch) - abort(); - - return; - } - - if (nob < basenob) { - CERROR("Short receive from %s: got %d, wanted at least %d\n", - ptllnd_ptlid2str(initiator), nob, basenob); - return; - } - - /* checksum must be computed with - * 1) ptlm_cksum zero and - * 2) BEFORE anything gets modified/flipped - */ - msg_cksum = flip ? __swab32(msg->ptlm_cksum) : msg->ptlm_cksum; - msg->ptlm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != ptllnd_cksum(msg, offsetof(kptl_msg_t, ptlm_u))) { - CERROR("Bad checksum from %s\n", ptllnd_ptlid2str(initiator)); - return; - } - - msg->ptlm_version = msg_version; - msg->ptlm_cksum = msg_cksum; - - if (flip) { - /* NB stamps are opaque cookies */ - __swab32s(&msg->ptlm_nob); - __swab64s(&msg->ptlm_srcnid); - __swab64s(&msg->ptlm_dstnid); - __swab32s(&msg->ptlm_srcpid); - __swab32s(&msg->ptlm_dstpid); - } - - srcid.nid = msg->ptlm_srcnid; - srcid.pid = msg->ptlm_srcpid; - - if (LNET_NIDNET(msg->ptlm_srcnid) != LNET_NIDNET(ni->ni_nid)) { - CERROR("Bad source id %s from %s\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) { - CERROR("NAK from %s (%s)\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - - if (plni->plni_dump_on_nak) - ptllnd_dump_debug(ni, srcid); - - if (plni->plni_abort_on_nak) - abort(); - - return; - } - - if (msg->ptlm_dstnid != ni->ni_nid || - msg->ptlm_dstpid != the_lnet.ln_pid) { - CERROR("Bad dstid %s (%s expected) from %s\n", - libcfs_id2str((lnet_process_id_t) { - .nid = msg->ptlm_dstnid, - .pid = msg->ptlm_dstpid}), - libcfs_id2str((lnet_process_id_t) { - .nid = ni->ni_nid, - .pid = the_lnet.ln_pid}), - libcfs_id2str(srcid)); - return; - } - - if (msg->ptlm_dststamp != plni->plni_stamp) { - CERROR("Bad dststamp "LPX64"("LPX64" expected) from %s\n", - msg->ptlm_dststamp, plni->plni_stamp, - libcfs_id2str(srcid)); - return; - } - - PTLLND_HISTORY("RX %s: %s %d %p", libcfs_id2str(srcid), - ptllnd_msgtype2str(msg->ptlm_type), - msg->ptlm_credits, &rx); - - switch (msg->ptlm_type) { - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - if (nob < basenob + sizeof(kptl_rdma_msg_t)) { - CERROR("Short rdma request from %s(%s)\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - if (flip) - __swab64s(&msg->ptlm_u.rdma.kptlrm_matchbits); - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - if (nob < offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload)) { - CERROR("Short immediate from %s(%s)\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - break; - - case PTLLND_MSG_TYPE_HELLO: - if (nob < basenob + sizeof(kptl_hello_msg_t)) { - CERROR("Short hello from %s(%s)\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - if(flip){ - __swab64s(&msg->ptlm_u.hello.kptlhm_matchbits); - __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size); - } - break; - - case PTLLND_MSG_TYPE_NOOP: - break; - - default: - CERROR("Bad message type %d from %s(%s)\n", msg->ptlm_type, - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - - plp = ptllnd_find_peer(ni, srcid, 0); - if (plp == NULL) { - CERROR("Can't find peer %s\n", libcfs_id2str(srcid)); - return; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) { - if (plp->plp_recvd_hello) { - CERROR("Unexpected HELLO from %s\n", - libcfs_id2str(srcid)); - ptllnd_peer_decref(plp); - return; - } - - plp->plp_max_msg_size = msg->ptlm_u.hello.kptlhm_max_msg_size; - plp->plp_match = msg->ptlm_u.hello.kptlhm_matchbits; - plp->plp_stamp = msg->ptlm_srcstamp; - plp->plp_recvd_hello = 1; - - } else if (!plp->plp_recvd_hello) { - - CERROR("Bad message type %d (HELLO expected) from %s\n", - msg->ptlm_type, libcfs_id2str(srcid)); - ptllnd_peer_decref(plp); - return; - - } else if (msg->ptlm_srcstamp != plp->plp_stamp) { - - CERROR("Bad srcstamp "LPX64"("LPX64" expected) from %s\n", - msg->ptlm_srcstamp, plp->plp_stamp, - libcfs_id2str(srcid)); - ptllnd_peer_decref(plp); - return; - } - - /* Check peer only sends when I've sent her credits */ - if (plp->plp_sent_credits == 0) { - CERROR("%s[%d/%d+%d(%d)]: unexpected message\n", - libcfs_id2str(plp->plp_id), - plp->plp_credits, plp->plp_outstanding_credits, - plp->plp_sent_credits, - plni->plni_peer_credits + plp->plp_lazy_credits); - return; - } - plp->plp_sent_credits--; - - /* No check for credit overflow - the peer may post new buffers after - * the startup handshake. */ - if (msg->ptlm_credits > 0) { - plp->plp_credits += msg->ptlm_credits; - ptllnd_check_sends(plp); - } - - /* All OK so far; assume the message is good... */ - - rx.rx_peer = plp; - rx.rx_msg = msg; - rx.rx_nob = nob; - plni->plni_nrxs++; - - switch (msg->ptlm_type) { - default: /* message types have been checked already */ - ptllnd_rx_done(&rx); - break; - - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - rc = lnet_parse(ni, &msg->ptlm_u.rdma.kptlrm_hdr, - msg->ptlm_srcnid, &rx, 1); - if (rc < 0) - ptllnd_rx_done(&rx); - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - rc = lnet_parse(ni, &msg->ptlm_u.immediate.kptlim_hdr, - msg->ptlm_srcnid, &rx, 0); - if (rc < 0) - ptllnd_rx_done(&rx); - break; - } - - ptllnd_peer_decref(plp); -} - -void -ptllnd_buf_event (lnet_ni_t *ni, ptl_event_t *event) -{ - ptllnd_buffer_t *buf = ptllnd_eventarg2obj(event->md.user_ptr); - ptllnd_ni_t *plni = ni->ni_data; - char *msg = &buf->plb_buffer[event->offset]; - int repost; - int unlinked = event->type == PTL_EVENT_UNLINK; - - LASSERT (buf->plb_ni == ni); - LASSERT (event->type == PTL_EVENT_PUT_END || - event->type == PTL_EVENT_UNLINK); - - if (event->ni_fail_type != PTL_NI_OK) { - - CERROR("event type %s(%d), status %s(%d) from %s\n", - ptllnd_evtype2str(event->type), event->type, - ptllnd_errtype2str(event->ni_fail_type), - event->ni_fail_type, - ptllnd_ptlid2str(event->initiator)); - - } else if (event->type == PTL_EVENT_PUT_END) { -#if (PTL_MD_LOCAL_ALIGN8 == 0) - /* Portals can't force message alignment - someone sending an - * odd-length message could misalign subsequent messages */ - if ((event->mlength & 7) != 0) { - CERROR("Message from %s has odd length %llu: " - "probable version incompatibility\n", - ptllnd_ptlid2str(event->initiator), - event->mlength); - LBUG(); - } -#endif - LASSERT ((event->offset & 7) == 0); - - ptllnd_parse_request(ni, event->initiator, - (kptl_msg_t *)msg, event->mlength); - } - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - /* UNLINK event only on explicit unlink */ - repost = (event->unlinked && event->type != PTL_EVENT_UNLINK); - if (event->unlinked) - unlinked = 1; -#else - /* UNLINK event only on implicit unlink */ - repost = (event->type == PTL_EVENT_UNLINK); -#endif - - if (unlinked) { - LASSERT(buf->plb_posted); - buf->plb_posted = 0; - plni->plni_nposted_buffers--; - } - - if (repost) - (void) ptllnd_post_buffer(buf); -} - -void -ptllnd_tx_event (lnet_ni_t *ni, ptl_event_t *event) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx = ptllnd_eventarg2obj(event->md.user_ptr); - int error = (event->ni_fail_type != PTL_NI_OK); - int isreq; - int isbulk; -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - int unlinked = event->unlinked; -#else - int unlinked = (event->type == PTL_EVENT_UNLINK); -#endif - - if (error) - CERROR("Error %s(%d) event %s(%d) unlinked %d, %s(%d) for %s\n", - ptllnd_errtype2str(event->ni_fail_type), - event->ni_fail_type, - ptllnd_evtype2str(event->type), event->type, - unlinked, ptllnd_msgtype2str(tx->tx_type), tx->tx_type, - libcfs_id2str(tx->tx_peer->plp_id)); - - LASSERT (!PtlHandleIsEqual(event->md_handle, PTL_INVALID_HANDLE)); - - isreq = PtlHandleIsEqual(event->md_handle, tx->tx_reqmdh); - if (isreq) { - LASSERT (event->md.start == (void *)&tx->tx_msg); - if (unlinked) { - tx->tx_reqmdh = PTL_INVALID_HANDLE; - gettimeofday(&tx->tx_req_done, NULL); - } - } - - isbulk = PtlHandleIsEqual(event->md_handle, tx->tx_bulkmdh); - if ( isbulk && unlinked ) { - tx->tx_bulkmdh = PTL_INVALID_HANDLE; - gettimeofday(&tx->tx_bulk_done, NULL); - } - - LASSERT (!isreq != !isbulk); /* always one and only 1 match */ - - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: TX done %p %s%s", - libcfs_id2str(tx->tx_peer->plp_id), - tx->tx_peer->plp_credits, - tx->tx_peer->plp_outstanding_credits, - tx->tx_peer->plp_sent_credits, - plni->plni_peer_credits + tx->tx_peer->plp_lazy_credits, - tx, isreq ? "REQ" : "BULK", unlinked ? "(unlinked)" : ""); - - LASSERT (!isreq != !isbulk); /* always one and only 1 match */ - switch (tx->tx_type) { - default: - LBUG(); - - case PTLLND_MSG_TYPE_NOOP: - case PTLLND_MSG_TYPE_HELLO: - case PTLLND_MSG_TYPE_IMMEDIATE: - LASSERT (event->type == PTL_EVENT_UNLINK || - event->type == PTL_EVENT_SEND_END); - LASSERT (isreq); - break; - - case PTLLND_MSG_TYPE_GET: - LASSERT (event->type == PTL_EVENT_UNLINK || - (isreq && event->type == PTL_EVENT_SEND_END) || - (isbulk && event->type == PTL_EVENT_PUT_END)); - - if (isbulk && !error && event->type == PTL_EVENT_PUT_END) { - /* Check GET matched */ - if (event->hdr_data == PTLLND_RDMA_OK) { - lnet_set_reply_msg_len(ni, - tx->tx_lnetreplymsg, - event->mlength); - } else { - CERROR ("Unmatched GET with %s\n", - libcfs_id2str(tx->tx_peer->plp_id)); - tx->tx_status = -EIO; - } - } - break; - - case PTLLND_MSG_TYPE_PUT: - LASSERT (event->type == PTL_EVENT_UNLINK || - (isreq && event->type == PTL_EVENT_SEND_END) || - (isbulk && event->type == PTL_EVENT_GET_END)); - break; - - case PTLLND_RDMA_READ: - LASSERT (event->type == PTL_EVENT_UNLINK || - event->type == PTL_EVENT_SEND_END || - event->type == PTL_EVENT_REPLY_END); - LASSERT (isbulk); - break; - - case PTLLND_RDMA_WRITE: - LASSERT (event->type == PTL_EVENT_UNLINK || - event->type == PTL_EVENT_SEND_END); - LASSERT (isbulk); - } - - /* Schedule ptllnd_tx_done() on error or last completion event */ - if (error || - (PtlHandleIsEqual(tx->tx_bulkmdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(tx->tx_reqmdh, PTL_INVALID_HANDLE))) { - if (error) - tx->tx_status = -EIO; - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &plni->plni_zombie_txs); - } -} - -ptllnd_tx_t * -ptllnd_find_timed_out_tx(ptllnd_peer_t *peer) -{ - time_t now = cfs_time_current_sec(); - struct list_head *tmp; - - list_for_each(tmp, &peer->plp_txq) { - ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - if (tx->tx_deadline < now) - return tx; - } - - list_for_each(tmp, &peer->plp_activeq) { - ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - if (tx->tx_deadline < now) - return tx; - } - - return NULL; -} - -void -ptllnd_check_peer(ptllnd_peer_t *peer) -{ - ptllnd_tx_t *tx = ptllnd_find_timed_out_tx(peer); - - if (tx == NULL) - return; - - CERROR("%s: timed out\n", libcfs_id2str(peer->plp_id)); - ptllnd_close_peer(peer, -ETIMEDOUT); -} - -void -ptllnd_watchdog (lnet_ni_t *ni, time_t now) -{ - ptllnd_ni_t *plni = ni->ni_data; - const int n = 4; - int p = plni->plni_watchdog_interval; - int chunk = plni->plni_peer_hash_size; - int interval = now - (plni->plni_watchdog_nextt - p); - int i; - struct list_head *hashlist; - struct list_head *tmp; - struct list_head *nxt; - - /* Time to check for RDMA timeouts on a few more peers: - * I try to do checks every 'p' seconds on a proportion of the peer - * table and I need to check every connection 'n' times within a - * timeout interval, to ensure I detect a timeout on any connection - * within (n+1)/n times the timeout interval. */ - - LASSERT (now >= plni->plni_watchdog_nextt); - - if (plni->plni_timeout > n * interval) { /* Scan less than the whole table? */ - chunk = (chunk * n * interval) / plni->plni_timeout; - if (chunk == 0) - chunk = 1; - } - - for (i = 0; i < chunk; i++) { - hashlist = &plni->plni_peer_hash[plni->plni_watchdog_peeridx]; - - list_for_each_safe(tmp, nxt, hashlist) { - ptllnd_check_peer(list_entry(tmp, ptllnd_peer_t, plp_list)); - } - - plni->plni_watchdog_peeridx = (plni->plni_watchdog_peeridx + 1) % - plni->plni_peer_hash_size; - } - - plni->plni_watchdog_nextt = now + p; -} - -void -ptllnd_wait (lnet_ni_t *ni, int milliseconds) -{ - static struct timeval prevt; - static int prevt_count; - static int call_count; - - struct timeval start; - struct timeval then; - struct timeval now; - struct timeval deadline; - - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx; - ptl_event_t event; - int which; - int rc; - int found = 0; - int timeout = 0; - - /* Handle any currently queued events, returning immediately if any. - * Otherwise block for the timeout and handle all events queued - * then. */ - - gettimeofday(&start, NULL); - call_count++; - - if (milliseconds <= 0) { - deadline = start; - } else { - deadline.tv_sec = start.tv_sec + milliseconds/1000; - deadline.tv_usec = start.tv_usec + (milliseconds % 1000)*1000; - - if (deadline.tv_usec >= 1000000) { - start.tv_usec -= 1000000; - start.tv_sec++; - } - } - - for (;;) { - gettimeofday(&then, NULL); - - rc = PtlEQPoll(&plni->plni_eqh, 1, timeout, &event, &which); - - gettimeofday(&now, NULL); - - if ((now.tv_sec*1000 + now.tv_usec/1000) - - (then.tv_sec*1000 + then.tv_usec/1000) > timeout + 1000) { - /* 1000 mS grace...........................^ */ - CERROR("SLOW PtlEQPoll(%d): %dmS elapsed\n", timeout, - (int)(now.tv_sec*1000 + now.tv_usec/1000) - - (int)(then.tv_sec*1000 + then.tv_usec/1000)); - } - - if (rc == PTL_EQ_EMPTY) { - if (found) /* handled some events */ - break; - - if (now.tv_sec >= plni->plni_watchdog_nextt) { /* check timeouts? */ - ptllnd_watchdog(ni, now.tv_sec); - LASSERT (now.tv_sec < plni->plni_watchdog_nextt); - } - - if (now.tv_sec > deadline.tv_sec || /* timeout expired */ - (now.tv_sec == deadline.tv_sec && - now.tv_usec >= deadline.tv_usec)) - break; - - if (milliseconds < 0 || - plni->plni_watchdog_nextt <= deadline.tv_sec) { - timeout = (plni->plni_watchdog_nextt - now.tv_sec)*1000; - } else { - timeout = (deadline.tv_sec - now.tv_sec)*1000 + - (deadline.tv_usec - now.tv_usec)/1000; - } - - continue; - } - - LASSERT (rc == PTL_OK || rc == PTL_EQ_DROPPED); - - if (rc == PTL_EQ_DROPPED) - CERROR("Event queue: size %d is too small\n", - plni->plni_eq_size); - - timeout = 0; - found = 1; - - switch (ptllnd_eventarg2type(event.md.user_ptr)) { - default: - LBUG(); - - case PTLLND_EVENTARG_TYPE_TX: - ptllnd_tx_event(ni, &event); - break; - - case PTLLND_EVENTARG_TYPE_BUF: - ptllnd_buf_event(ni, &event); - break; - } - } - - while (!list_empty(&plni->plni_zombie_txs)) { - tx = list_entry(plni->plni_zombie_txs.next, - ptllnd_tx_t, tx_list); - list_del_init(&tx->tx_list); - ptllnd_tx_done(tx); - } - - if (prevt.tv_sec == 0 || - prevt.tv_sec != now.tv_sec) { - PTLLND_HISTORY("%d wait entered at %d.%06d - prev %d %d.%06d", - call_count, (int)start.tv_sec, (int)start.tv_usec, - prevt_count, (int)prevt.tv_sec, (int)prevt.tv_usec); - prevt = now; - } -} diff --git a/lnet/ulnds/socklnd/.cvsignore b/lnet/ulnds/socklnd/.cvsignore deleted file mode 100644 index e9955884756af11fe171e89bf99e459ac44f1a2a..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/ulnds/socklnd/Makefile.am b/lnet/ulnds/socklnd/Makefile.am deleted file mode 100644 index d94443cc2deb8e42a96f861f0b18319d69ec9724..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -if LIBLUSTRE -if BUILD_USOCKLND -noinst_LIBRARIES = libsocklnd.a -endif -endif - -noinst_HEADERS = usocklnd.h -libsocklnd_a_SOURCES = usocklnd.h usocklnd.c usocklnd_cb.c poll.c \ - handlers.c conn.c -libsocklnd_a_CPPFLAGS = $(LLCPPFLAGS) -libsocklnd_a_CFLAGS = $(LLCFLAGS) diff --git a/lnet/ulnds/socklnd/conn.c b/lnet/ulnds/socklnd/conn.c deleted file mode 100644 index 80a0779f20ce6734f0f36e363c398c623d755d43..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/conn.c +++ /dev/null @@ -1,1081 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Maxim Patlasov <maxim@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - */ - -#include "usocklnd.h" - -/* Return 1 if the conn is timed out, 0 else */ -int -usocklnd_conn_timed_out(usock_conn_t *conn, cfs_time_t current_time) -{ - if (conn->uc_tx_flag && /* sending is in progress */ - cfs_time_aftereq(current_time, conn->uc_tx_deadline)) - return 1; - - if (conn->uc_rx_flag && /* receiving is in progress */ - cfs_time_aftereq(current_time, conn->uc_rx_deadline)) - return 1; - - return 0; -} - -void -usocklnd_conn_kill(usock_conn_t *conn) -{ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state != UC_DEAD) - usocklnd_conn_kill_locked(conn); - pthread_mutex_unlock(&conn->uc_lock); -} - -/* Mark the conn as DEAD and schedule its deletion */ -void -usocklnd_conn_kill_locked(usock_conn_t *conn) -{ - conn->uc_rx_flag = conn->uc_tx_flag = 0; - conn->uc_state = UC_DEAD; - usocklnd_add_killrequest(conn); -} - -usock_conn_t * -usocklnd_conn_allocate() -{ - usock_conn_t *conn; - usock_pollrequest_t *pr; - - LIBCFS_ALLOC (pr, sizeof(*pr)); - if (pr == NULL) - return NULL; - - LIBCFS_ALLOC (conn, sizeof(*conn)); - if (conn == NULL) { - LIBCFS_FREE (pr, sizeof(*pr)); - return NULL; - } - memset(conn, 0, sizeof(*conn)); - conn->uc_preq = pr; - - LIBCFS_ALLOC (conn->uc_rx_hello, - offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - if (conn->uc_rx_hello == NULL) { - LIBCFS_FREE (pr, sizeof(*pr)); - LIBCFS_FREE (conn, sizeof(*conn)); - return NULL; - } - - return conn; -} - -void -usocklnd_conn_free(usock_conn_t *conn) -{ - usock_pollrequest_t *pr = conn->uc_preq; - - if (pr != NULL) - LIBCFS_FREE (pr, sizeof(*pr)); - - if (conn->uc_rx_hello != NULL) - LIBCFS_FREE (conn->uc_rx_hello, - offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - - LIBCFS_FREE (conn, sizeof(*conn)); -} - -void -usocklnd_tear_peer_conn(usock_conn_t *conn) -{ - usock_peer_t *peer = conn->uc_peer; - int idx = usocklnd_type2idx(conn->uc_type); - lnet_ni_t *ni; - lnet_process_id_t id; - int decref_flag = 0; - int killall_flag = 0; - - if (peer == NULL) /* nothing to tear */ - return; - - pthread_mutex_lock(&peer->up_lock); - pthread_mutex_lock(&conn->uc_lock); - - ni = peer->up_ni; - id = peer->up_peerid; - - if (peer->up_conns[idx] == conn) { - if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) { - /* change state not to finalize twice */ - conn->uc_rx_state = UC_RX_KSM_HEADER; - lnet_finalize(peer->up_ni, conn->uc_rx_lnetmsg, -EIO); - } - - usocklnd_destroy_txlist(peer->up_ni, - &conn->uc_tx_list); - - peer->up_conns[idx] = NULL; - conn->uc_peer = NULL; - decref_flag = 1; - - if(conn->uc_errored && !peer->up_errored) - peer->up_errored = killall_flag = 1; - } - - pthread_mutex_unlock(&conn->uc_lock); - - if (killall_flag) - usocklnd_del_conns_locked(peer); - - pthread_mutex_unlock(&peer->up_lock); - - if (!decref_flag) - return; - - usocklnd_conn_decref(conn); - usocklnd_peer_decref(peer); - - usocklnd_check_peer_stale(ni, id); -} - -/* Remove peer from hash list if all up_conns[i] is NULL && - * hash table is the only consumer of the peer */ -void -usocklnd_check_peer_stale(lnet_ni_t *ni, lnet_process_id_t id) -{ - usock_peer_t *peer; - - pthread_rwlock_wrlock(&usock_data.ud_peers_lock); - peer = usocklnd_find_peer_locked(ni, id); - - if (peer == NULL) { - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - return; - } - - if (cfs_atomic_read(&peer->up_refcount) == 2) { - int i; - for (i = 0; i < N_CONN_TYPES; i++) - LASSERT (peer->up_conns[i] == NULL); - - list_del(&peer->up_list); - - if (peer->up_errored && - (peer->up_peerid.pid & LNET_PID_USERFLAG) == 0) - lnet_notify (peer->up_ni, peer->up_peerid.nid, 0, - cfs_time_seconds(peer->up_last_alive)); - - usocklnd_peer_decref(peer); - } - - usocklnd_peer_decref(peer); - pthread_rwlock_unlock(&usock_data.ud_peers_lock); -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_create_passive_conn(lnet_ni_t *ni, int fd, usock_conn_t **connp) -{ - int rc; - __u32 peer_ip; - __u16 peer_port; - usock_conn_t *conn; - - rc = libcfs_getpeername(fd, &peer_ip, &peer_port); - if (rc) - return rc; - - rc = usocklnd_set_sock_options(fd); - if (rc) - return rc; - - conn = usocklnd_conn_allocate(); - if (conn == NULL) - return -ENOMEM; - - usocklnd_rx_hellomagic_state_transition(conn); - - conn->uc_fd = fd; - conn->uc_peer_ip = peer_ip; - conn->uc_peer_port = peer_port; - conn->uc_state = UC_RECEIVING_HELLO; - conn->uc_pt_idx = usocklnd_ip2pt_idx(peer_ip); - conn->uc_ni = ni; - CFS_INIT_LIST_HEAD (&conn->uc_tx_list); - CFS_INIT_LIST_HEAD (&conn->uc_zcack_list); - pthread_mutex_init(&conn->uc_lock, NULL); - cfs_atomic_set(&conn->uc_refcount, 1); /* 1 ref for me */ - - *connp = conn; - return 0; -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_create_active_conn(usock_peer_t *peer, int type, - usock_conn_t **connp) -{ - int rc; - int fd; - usock_conn_t *conn; - __u32 dst_ip = LNET_NIDADDR(peer->up_peerid.nid); - __u16 dst_port = lnet_acceptor_port(); - - conn = usocklnd_conn_allocate(); - if (conn == NULL) - return -ENOMEM; - - conn->uc_tx_hello = usocklnd_create_cr_hello_tx(peer->up_ni, type, - peer->up_peerid.nid); - if (conn->uc_tx_hello == NULL) { - usocklnd_conn_free(conn); - return -ENOMEM; - } - - if (the_lnet.ln_pid & LNET_PID_USERFLAG) - rc = usocklnd_connect_cli_mode(&fd, dst_ip, dst_port); - else - rc = usocklnd_connect_srv_mode(&fd, dst_ip, dst_port); - - if (rc) { - usocklnd_destroy_tx(NULL, conn->uc_tx_hello); - usocklnd_conn_free(conn); - return rc; - } - - conn->uc_tx_deadline = cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - - conn->uc_fd = fd; - conn->uc_peer_ip = dst_ip; - conn->uc_peer_port = dst_port; - conn->uc_type = type; - conn->uc_activeflag = 1; - conn->uc_state = UC_CONNECTING; - conn->uc_pt_idx = usocklnd_ip2pt_idx(dst_ip); - conn->uc_ni = NULL; - conn->uc_peerid = peer->up_peerid; - conn->uc_peer = peer; - usocklnd_peer_addref(peer); - CFS_INIT_LIST_HEAD (&conn->uc_tx_list); - CFS_INIT_LIST_HEAD (&conn->uc_zcack_list); - pthread_mutex_init(&conn->uc_lock, NULL); - cfs_atomic_set(&conn->uc_refcount, 1); /* 1 ref for me */ - - *connp = conn; - return 0; -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_connect_srv_mode(int *fdp, __u32 dst_ip, __u16 dst_port) -{ - __u16 port; - int fd; - int rc; - - for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT; - port >= LNET_ACCEPTOR_MIN_RESERVED_PORT; - port--) { - /* Iterate through reserved ports. */ - - rc = libcfs_sock_create(&fd); - if (rc) - return rc; - - rc = libcfs_sock_bind_to_port(fd, port); - if (rc) { - close(fd); - continue; - } - - rc = usocklnd_set_sock_options(fd); - if (rc) { - close(fd); - return rc; - } - - rc = libcfs_sock_connect(fd, dst_ip, dst_port); - if (rc == 0) { - *fdp = fd; - return 0; - } - - if (rc != -EADDRINUSE && rc != -EADDRNOTAVAIL) { - close(fd); - return rc; - } - - close(fd); - } - - CERROR("Can't bind to any reserved port\n"); - return rc; -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_connect_cli_mode(int *fdp, __u32 dst_ip, __u16 dst_port) -{ - int fd; - int rc; - - rc = libcfs_sock_create(&fd); - if (rc) - return rc; - - rc = usocklnd_set_sock_options(fd); - if (rc) { - close(fd); - return rc; - } - - rc = libcfs_sock_connect(fd, dst_ip, dst_port); - if (rc) { - close(fd); - return rc; - } - - *fdp = fd; - return 0; -} - -int -usocklnd_set_sock_options(int fd) -{ - int rc; - - rc = libcfs_sock_set_nagle(fd, usock_tuns.ut_socknagle); - if (rc) - return rc; - - if (usock_tuns.ut_sockbufsiz) { - rc = libcfs_sock_set_bufsiz(fd, usock_tuns.ut_sockbufsiz); - if (rc) - return rc; - } - - return libcfs_fcntl_nonblock(fd); -} - -void -usocklnd_init_msg(ksock_msg_t *msg, int type) -{ - msg->ksm_type = type; - msg->ksm_csum = 0; - msg->ksm_zc_req_cookie = 0; - msg->ksm_zc_ack_cookie = 0; -} - -usock_tx_t * -usocklnd_create_noop_tx(__u64 cookie) -{ - usock_tx_t *tx; - - LIBCFS_ALLOC (tx, sizeof(usock_tx_t)); - if (tx == NULL) - return NULL; - - tx->tx_size = sizeof(usock_tx_t); - tx->tx_lnetmsg = NULL; - - usocklnd_init_msg(&tx->tx_msg, KSOCK_MSG_NOOP); - tx->tx_msg.ksm_zc_ack_cookie = cookie; - - tx->tx_iova[0].iov_base = (void *)&tx->tx_msg; - tx->tx_iova[0].iov_len = tx->tx_resid = tx->tx_nob = - offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_iov = tx->tx_iova; - tx->tx_niov = 1; - - return tx; -} - -usock_tx_t * -usocklnd_create_tx(lnet_msg_t *lntmsg) -{ - usock_tx_t *tx; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - int size = offsetof(usock_tx_t, - tx_iova[1 + payload_niov]); - - LIBCFS_ALLOC (tx, size); - if (tx == NULL) - return NULL; - - tx->tx_size = size; - tx->tx_lnetmsg = lntmsg; - - tx->tx_resid = tx->tx_nob = - offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload) + - payload_nob; - - usocklnd_init_msg(&tx->tx_msg, KSOCK_MSG_LNET); - tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = lntmsg->msg_hdr; - tx->tx_iova[0].iov_base = (void *)&tx->tx_msg; - tx->tx_iova[0].iov_len = offsetof(ksock_msg_t, - ksm_u.lnetmsg.ksnm_payload); - tx->tx_iov = tx->tx_iova; - - tx->tx_niov = 1 + - lnet_extract_iov(payload_niov, &tx->tx_iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); - - return tx; -} - -void -usocklnd_init_hello_msg(ksock_hello_msg_t *hello, - lnet_ni_t *ni, int type, lnet_nid_t peer_nid) -{ - usock_net_t *net = (usock_net_t *)ni->ni_data; - - hello->kshm_magic = LNET_PROTO_MAGIC; - hello->kshm_version = KSOCK_PROTO_V2; - hello->kshm_nips = 0; - hello->kshm_ctype = type; - - hello->kshm_dst_incarnation = 0; /* not used */ - hello->kshm_src_incarnation = net->un_incarnation; - - hello->kshm_src_pid = the_lnet.ln_pid; - hello->kshm_src_nid = ni->ni_nid; - hello->kshm_dst_nid = peer_nid; - hello->kshm_dst_pid = 0; /* not used */ -} - -usock_tx_t * -usocklnd_create_hello_tx(lnet_ni_t *ni, - int type, lnet_nid_t peer_nid) -{ - usock_tx_t *tx; - int size; - ksock_hello_msg_t *hello; - - size = sizeof(usock_tx_t) + offsetof(ksock_hello_msg_t, kshm_ips); - LIBCFS_ALLOC (tx, size); - if (tx == NULL) - return NULL; - - tx->tx_size = size; - tx->tx_lnetmsg = NULL; - - hello = (ksock_hello_msg_t *)&tx->tx_iova[1]; - usocklnd_init_hello_msg(hello, ni, type, peer_nid); - - tx->tx_iova[0].iov_base = (void *)hello; - tx->tx_iova[0].iov_len = tx->tx_resid = tx->tx_nob = - offsetof(ksock_hello_msg_t, kshm_ips); - tx->tx_iov = tx->tx_iova; - tx->tx_niov = 1; - - return tx; -} - -usock_tx_t * -usocklnd_create_cr_hello_tx(lnet_ni_t *ni, - int type, lnet_nid_t peer_nid) -{ - usock_tx_t *tx; - int size; - lnet_acceptor_connreq_t *cr; - ksock_hello_msg_t *hello; - - size = sizeof(usock_tx_t) + - sizeof(lnet_acceptor_connreq_t) + - offsetof(ksock_hello_msg_t, kshm_ips); - LIBCFS_ALLOC (tx, size); - if (tx == NULL) - return NULL; - - tx->tx_size = size; - tx->tx_lnetmsg = NULL; - - cr = (lnet_acceptor_connreq_t *)&tx->tx_iova[1]; - memset(cr, 0, sizeof(*cr)); - cr->acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr->acr_version = LNET_PROTO_ACCEPTOR_VERSION; - cr->acr_nid = peer_nid; - - hello = (ksock_hello_msg_t *)((char *)cr + sizeof(*cr)); - usocklnd_init_hello_msg(hello, ni, type, peer_nid); - - tx->tx_iova[0].iov_base = (void *)cr; - tx->tx_iova[0].iov_len = tx->tx_resid = tx->tx_nob = - sizeof(lnet_acceptor_connreq_t) + - offsetof(ksock_hello_msg_t, kshm_ips); - tx->tx_iov = tx->tx_iova; - tx->tx_niov = 1; - - return tx; -} - -void -usocklnd_destroy_tx(lnet_ni_t *ni, usock_tx_t *tx) -{ - lnet_msg_t *lnetmsg = tx->tx_lnetmsg; - int rc = (tx->tx_resid == 0) ? 0 : -EIO; - - LASSERT (ni != NULL || lnetmsg == NULL); - - LIBCFS_FREE (tx, tx->tx_size); - - if (lnetmsg != NULL) /* NOOP and hello go without lnetmsg */ - lnet_finalize(ni, lnetmsg, rc); -} - -void -usocklnd_destroy_txlist(lnet_ni_t *ni, struct list_head *txlist) -{ - usock_tx_t *tx; - - while (!list_empty(txlist)) { - tx = list_entry(txlist->next, usock_tx_t, tx_list); - list_del(&tx->tx_list); - - usocklnd_destroy_tx(ni, tx); - } -} - -void -usocklnd_destroy_zcack_list(struct list_head *zcack_list) -{ - usock_zc_ack_t *zcack; - - while (!list_empty(zcack_list)) { - zcack = list_entry(zcack_list->next, usock_zc_ack_t, zc_list); - list_del(&zcack->zc_list); - - LIBCFS_FREE (zcack, sizeof(*zcack)); - } -} - -void -usocklnd_destroy_peer(usock_peer_t *peer) -{ - usock_net_t *net = peer->up_ni->ni_data; - int i; - - for (i = 0; i < N_CONN_TYPES; i++) - LASSERT (peer->up_conns[i] == NULL); - - LIBCFS_FREE (peer, sizeof (*peer)); - - pthread_mutex_lock(&net->un_lock); - if(--net->un_peercount == 0) - pthread_cond_signal(&net->un_cond); - pthread_mutex_unlock(&net->un_lock); -} - -void -usocklnd_destroy_conn(usock_conn_t *conn) -{ - LASSERT (conn->uc_peer == NULL || conn->uc_ni == NULL); - - if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) { - LASSERT (conn->uc_peer != NULL); - lnet_finalize(conn->uc_peer->up_ni, conn->uc_rx_lnetmsg, -EIO); - } - - if (!list_empty(&conn->uc_tx_list)) { - LASSERT (conn->uc_peer != NULL); - usocklnd_destroy_txlist(conn->uc_peer->up_ni, &conn->uc_tx_list); - } - - usocklnd_destroy_zcack_list(&conn->uc_zcack_list); - - if (conn->uc_peer != NULL) - usocklnd_peer_decref(conn->uc_peer); - - if (conn->uc_ni != NULL) - lnet_ni_decref(conn->uc_ni); - - if (conn->uc_tx_hello) - usocklnd_destroy_tx(NULL, conn->uc_tx_hello); - - usocklnd_conn_free(conn); -} - -int -usocklnd_get_conn_type(lnet_msg_t *lntmsg) -{ - int nob; - - if (the_lnet.ln_pid & LNET_PID_USERFLAG) - return SOCKLND_CONN_ANY; - - nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload) + - lntmsg->msg_len; - - if (nob >= usock_tuns.ut_min_bulk) - return SOCKLND_CONN_BULK_OUT; - else - return SOCKLND_CONN_CONTROL; -} - -int usocklnd_type2idx(int type) -{ - switch (type) { - case SOCKLND_CONN_ANY: - case SOCKLND_CONN_CONTROL: - return 0; - case SOCKLND_CONN_BULK_IN: - return 1; - case SOCKLND_CONN_BULK_OUT: - return 2; - default: - LBUG(); - } -} - -usock_peer_t * -usocklnd_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id) -{ - struct list_head *peer_list = usocklnd_nid2peerlist(id.nid); - struct list_head *tmp; - usock_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry (tmp, usock_peer_t, up_list); - - if (peer->up_ni != ni) - continue; - - if (peer->up_peerid.nid != id.nid || - peer->up_peerid.pid != id.pid) - continue; - - usocklnd_peer_addref(peer); - return peer; - } - return (NULL); -} - -int -usocklnd_create_peer(lnet_ni_t *ni, lnet_process_id_t id, - usock_peer_t **peerp) -{ - usock_net_t *net = ni->ni_data; - usock_peer_t *peer; - int i; - - LIBCFS_ALLOC (peer, sizeof (*peer)); - if (peer == NULL) - return -ENOMEM; - - for (i = 0; i < N_CONN_TYPES; i++) - peer->up_conns[i] = NULL; - - peer->up_peerid = id; - peer->up_ni = ni; - peer->up_incrn_is_set = 0; - peer->up_errored = 0; - peer->up_last_alive = 0; - cfs_atomic_set (&peer->up_refcount, 1); /* 1 ref for caller */ - pthread_mutex_init(&peer->up_lock, NULL); - - pthread_mutex_lock(&net->un_lock); - net->un_peercount++; - pthread_mutex_unlock(&net->un_lock); - - *peerp = peer; - return 0; -} - -/* Safely create new peer if needed. Save result in *peerp. - * Returns 0 on success, <0 else */ -int -usocklnd_find_or_create_peer(lnet_ni_t *ni, lnet_process_id_t id, - usock_peer_t **peerp) -{ - int rc; - usock_peer_t *peer; - usock_peer_t *peer2; - usock_net_t *net = ni->ni_data; - - pthread_rwlock_rdlock(&usock_data.ud_peers_lock); - peer = usocklnd_find_peer_locked(ni, id); - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - - if (peer != NULL) - goto find_or_create_peer_done; - - rc = usocklnd_create_peer(ni, id, &peer); - if (rc) - return rc; - - pthread_rwlock_wrlock(&usock_data.ud_peers_lock); - peer2 = usocklnd_find_peer_locked(ni, id); - if (peer2 == NULL) { - if (net->un_shutdown) { - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - usocklnd_peer_decref(peer); /* should destroy peer */ - CERROR("Can't create peer: network shutdown\n"); - return -ESHUTDOWN; - } - - /* peer table will take 1 of my refs on peer */ - usocklnd_peer_addref(peer); - list_add_tail (&peer->up_list, - usocklnd_nid2peerlist(id.nid)); - } else { - usocklnd_peer_decref(peer); /* should destroy peer */ - peer = peer2; - } - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - - find_or_create_peer_done: - *peerp = peer; - return 0; -} - -/* NB: both peer and conn locks are held */ -static int -usocklnd_enqueue_zcack(usock_conn_t *conn, usock_zc_ack_t *zc_ack) -{ - if (conn->uc_state == UC_READY && - list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list) && - !conn->uc_sending) { - int rc = usocklnd_add_pollrequest(conn, POLL_TX_SET_REQUEST, - POLLOUT); - if (rc != 0) - return rc; - } - - list_add_tail(&zc_ack->zc_list, &conn->uc_zcack_list); - return 0; -} - -/* NB: both peer and conn locks are held - * NB: if sending isn't in progress. the caller *MUST* send tx - * immediately after we'll return */ -static void -usocklnd_enqueue_tx(usock_conn_t *conn, usock_tx_t *tx, - int *send_immediately) -{ - if (conn->uc_state == UC_READY && - list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list) && - !conn->uc_sending) { - conn->uc_sending = 1; - *send_immediately = 1; - return; - } - - *send_immediately = 0; - list_add_tail(&tx->tx_list, &conn->uc_tx_list); -} - -/* Safely create new conn if needed. Save result in *connp. - * Returns 0 on success, <0 else */ -int -usocklnd_find_or_create_conn(usock_peer_t *peer, int type, - usock_conn_t **connp, - usock_tx_t *tx, usock_zc_ack_t *zc_ack, - int *send_immediately) -{ - usock_conn_t *conn; - int idx; - int rc; - lnet_pid_t userflag = peer->up_peerid.pid & LNET_PID_USERFLAG; - - if (userflag) - type = SOCKLND_CONN_ANY; - - idx = usocklnd_type2idx(type); - - pthread_mutex_lock(&peer->up_lock); - if (peer->up_conns[idx] != NULL) { - conn = peer->up_conns[idx]; - LASSERT(conn->uc_type == type); - } else { - if (userflag) { - CERROR("Refusing to create a connection to " - "userspace process %s\n", - libcfs_id2str(peer->up_peerid)); - rc = -EHOSTUNREACH; - goto find_or_create_conn_failed; - } - - rc = usocklnd_create_active_conn(peer, type, &conn); - if (rc) { - peer->up_errored = 1; - usocklnd_del_conns_locked(peer); - goto find_or_create_conn_failed; - } - - /* peer takes 1 of conn refcount */ - usocklnd_link_conn_to_peer(conn, peer, idx); - - rc = usocklnd_add_pollrequest(conn, POLL_ADD_REQUEST, POLLOUT); - if (rc) { - peer->up_conns[idx] = NULL; - usocklnd_conn_decref(conn); /* should destroy conn */ - goto find_or_create_conn_failed; - } - usocklnd_wakeup_pollthread(conn->uc_pt_idx); - } - - pthread_mutex_lock(&conn->uc_lock); - LASSERT(conn->uc_peer == peer); - - LASSERT(tx == NULL || zc_ack == NULL); - if (tx != NULL) { - usocklnd_enqueue_tx(conn, tx, send_immediately); - } else { - rc = usocklnd_enqueue_zcack(conn, zc_ack); - if (rc != 0) { - usocklnd_conn_kill_locked(conn); - pthread_mutex_unlock(&conn->uc_lock); - goto find_or_create_conn_failed; - } - } - pthread_mutex_unlock(&conn->uc_lock); - - usocklnd_conn_addref(conn); - pthread_mutex_unlock(&peer->up_lock); - - *connp = conn; - return 0; - - find_or_create_conn_failed: - pthread_mutex_unlock(&peer->up_lock); - return rc; -} - -void -usocklnd_link_conn_to_peer(usock_conn_t *conn, usock_peer_t *peer, int idx) -{ - peer->up_conns[idx] = conn; - peer->up_errored = 0; /* this new fresh conn will try - * revitalize even stale errored peer */ -} - -int -usocklnd_invert_type(int type) -{ - switch (type) - { - case SOCKLND_CONN_ANY: - case SOCKLND_CONN_CONTROL: - return (type); - case SOCKLND_CONN_BULK_IN: - return SOCKLND_CONN_BULK_OUT; - case SOCKLND_CONN_BULK_OUT: - return SOCKLND_CONN_BULK_IN; - default: - return SOCKLND_CONN_NONE; - } -} - -void -usocklnd_conn_new_state(usock_conn_t *conn, int new_state) -{ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state != UC_DEAD) - conn->uc_state = new_state; - pthread_mutex_unlock(&conn->uc_lock); -} - -/* NB: peer is locked by caller */ -void -usocklnd_cleanup_stale_conns(usock_peer_t *peer, __u64 incrn, - usock_conn_t *skip_conn) -{ - int i; - - if (!peer->up_incrn_is_set) { - peer->up_incarnation = incrn; - peer->up_incrn_is_set = 1; - return; - } - - if (peer->up_incarnation == incrn) - return; - - peer->up_incarnation = incrn; - - for (i = 0; i < N_CONN_TYPES; i++) { - usock_conn_t *conn = peer->up_conns[i]; - - if (conn == NULL || conn == skip_conn) - continue; - - pthread_mutex_lock(&conn->uc_lock); - LASSERT (conn->uc_peer == peer); - conn->uc_peer = NULL; - peer->up_conns[i] = NULL; - if (conn->uc_state != UC_DEAD) - usocklnd_conn_kill_locked(conn); - pthread_mutex_unlock(&conn->uc_lock); - - usocklnd_conn_decref(conn); - usocklnd_peer_decref(peer); - } -} - -/* RX state transition to UC_RX_HELLO_MAGIC: update RX part to receive - * MAGIC part of hello and set uc_rx_state - */ -void -usocklnd_rx_hellomagic_state_transition(usock_conn_t *conn) -{ - LASSERT(conn->uc_rx_hello != NULL); - - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_magic; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - sizeof(conn->uc_rx_hello->kshm_magic); - - conn->uc_rx_state = UC_RX_HELLO_MAGIC; - - conn->uc_rx_flag = 1; /* waiting for incoming hello */ - conn->uc_rx_deadline = cfs_time_shift(usock_tuns.ut_timeout); -} - -/* RX state transition to UC_RX_HELLO_VERSION: update RX part to receive - * VERSION part of hello and set uc_rx_state - */ -void -usocklnd_rx_helloversion_state_transition(usock_conn_t *conn) -{ - LASSERT(conn->uc_rx_hello != NULL); - - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_version; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - sizeof(conn->uc_rx_hello->kshm_version); - - conn->uc_rx_state = UC_RX_HELLO_VERSION; -} - -/* RX state transition to UC_RX_HELLO_BODY: update RX part to receive - * the rest of hello and set uc_rx_state - */ -void -usocklnd_rx_hellobody_state_transition(usock_conn_t *conn) -{ - LASSERT(conn->uc_rx_hello != NULL); - - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_src_nid; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - offsetof(ksock_hello_msg_t, kshm_ips) - - offsetof(ksock_hello_msg_t, kshm_src_nid); - - conn->uc_rx_state = UC_RX_HELLO_BODY; -} - -/* RX state transition to UC_RX_HELLO_IPS: update RX part to receive - * array of IPs and set uc_rx_state - */ -void -usocklnd_rx_helloIPs_state_transition(usock_conn_t *conn) -{ - LASSERT(conn->uc_rx_hello != NULL); - - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_ips; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - conn->uc_rx_hello->kshm_nips * - sizeof(conn->uc_rx_hello->kshm_ips[0]); - - conn->uc_rx_state = UC_RX_HELLO_IPS; -} - -/* RX state transition to UC_RX_LNET_HEADER: update RX part to receive - * LNET header and set uc_rx_state - */ -void -usocklnd_rx_lnethdr_state_transition(usock_conn_t *conn) -{ - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_msg.ksm_u.lnetmsg; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - sizeof(ksock_lnet_msg_t); - - conn->uc_rx_state = UC_RX_LNET_HEADER; - conn->uc_rx_flag = 1; -} - -/* RX state transition to UC_RX_KSM_HEADER: update RX part to receive - * KSM header and set uc_rx_state - */ -void -usocklnd_rx_ksmhdr_state_transition(usock_conn_t *conn) -{ - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_msg; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - offsetof(ksock_msg_t, ksm_u); - - conn->uc_rx_state = UC_RX_KSM_HEADER; - conn->uc_rx_flag = 0; -} - -/* RX state transition to UC_RX_SKIPPING: update RX part for - * skipping and set uc_rx_state - */ -void -usocklnd_rx_skipping_state_transition(usock_conn_t *conn) -{ - static char skip_buffer[4096]; - - int nob; - unsigned int niov = 0; - int skipped = 0; - int nob_to_skip = conn->uc_rx_nob_left; - - LASSERT(nob_to_skip != 0); - - conn->uc_rx_iov = conn->uc_rx_iova; - - /* Set up to skip as much as possible now. If there's more left - * (ran out of iov entries) we'll get called again */ - - do { - nob = MIN (nob_to_skip, sizeof(skip_buffer)); - - conn->uc_rx_iov[niov].iov_base = skip_buffer; - conn->uc_rx_iov[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -=nob; - - } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ - niov < sizeof(conn->uc_rx_iova) / sizeof (struct iovec)); - - conn->uc_rx_niov = niov; - conn->uc_rx_nob_wanted = skipped; - - conn->uc_rx_state = UC_RX_SKIPPING; -} diff --git a/lnet/ulnds/socklnd/handlers.c b/lnet/ulnds/socklnd/handlers.c deleted file mode 100644 index 63c6d248d2ea2ebe2bcddb2a9c4de78b3b004581..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/handlers.c +++ /dev/null @@ -1,1005 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Maxim Patlasov <maxim@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - */ - -#include "usocklnd.h" -#include <unistd.h> -#include <sys/syscall.h> - -int -usocklnd_notifier_handler(int fd) -{ - int notification; - return syscall(SYS_read, fd, ¬ification, sizeof(notification)); -} - -void -usocklnd_exception_handler(usock_conn_t *conn) -{ - pthread_mutex_lock(&conn->uc_lock); - - if (conn->uc_state == UC_CONNECTING || - conn->uc_state == UC_SENDING_HELLO) - usocklnd_conn_kill_locked(conn); - - pthread_mutex_unlock(&conn->uc_lock); -} - -int -usocklnd_read_handler(usock_conn_t *conn) -{ - int rc; - int continue_reading; - int state; - - read_again: - rc = 0; - pthread_mutex_lock(&conn->uc_lock); - state = conn->uc_state; - - /* process special case: LNET calls lnd_recv() asyncronously */ - if (state == UC_READY && conn->uc_rx_state == UC_RX_PARSE) { - /* still don't have usocklnd_recv() called */ - rc = usocklnd_add_pollrequest(conn, POLL_RX_SET_REQUEST, 0); - if (rc == 0) - conn->uc_rx_state = UC_RX_PARSE_WAIT; - else - usocklnd_conn_kill_locked(conn); - - pthread_mutex_unlock(&conn->uc_lock); - return rc; - } - - pthread_mutex_unlock(&conn->uc_lock); - /* From here and below the conn cannot be changed - * asyncronously, except: - * 1) usocklnd_send() can work with uc_tx_list and uc_zcack_list, - * 2) usocklnd_shutdown() can change uc_state to UC_DEAD */ - - switch (state) { - - case UC_RECEIVING_HELLO: - case UC_READY: - if (conn->uc_rx_nob_wanted != 0) { - /* read from conn fd as much wanted data as possible */ - rc = usocklnd_read_data(conn); - if (rc == 0) /* partial read */ - break; - if (rc < 0) {/* error happened or EOF */ - usocklnd_conn_kill(conn); - break; - } - } - - /* process incoming data */ - if (state == UC_READY ) - rc = usocklnd_read_msg(conn, &continue_reading); - else /* state == UC_RECEIVING_HELLO */ - rc = usocklnd_read_hello(conn, &continue_reading); - - if (rc < 0) { - usocklnd_conn_kill(conn); - break; - } - - if (continue_reading) - goto read_again; - - break; - - case UC_DEAD: - break; - - default: - LBUG(); - } - - return rc; -} - -/* Switch on rx_state. - * Return 0 on success, 1 if whole packet is read, else return <0 - * Always set cont_flag: 1 if we're ready to continue reading, else 0 - * NB: If whole packet is read, cont_flag will be set to zero to take - * care of fairess - */ -int -usocklnd_read_msg(usock_conn_t *conn, int *cont_flag) -{ - int rc = 0; - __u64 cookie; - - *cont_flag = 0; - - /* smth. new emerged in RX part - let's process it */ - switch (conn->uc_rx_state) { - case UC_RX_KSM_HEADER: - if (conn->uc_flip) { - __swab32s(&conn->uc_rx_msg.ksm_type); - __swab32s(&conn->uc_rx_msg.ksm_csum); - __swab64s(&conn->uc_rx_msg.ksm_zc_req_cookie); - __swab64s(&conn->uc_rx_msg.ksm_zc_ack_cookie); - } - - /* we never send packets for wich zc-acking is required */ - if (conn->uc_rx_msg.ksm_type != KSOCK_MSG_LNET || - conn->uc_rx_msg.ksm_zc_ack_cookie != 0) { - conn->uc_errored = 1; - return -EPROTO; - } - - /* zc_req will be processed later, when - lnet payload will be received */ - - usocklnd_rx_lnethdr_state_transition(conn); - *cont_flag = 1; - break; - - case UC_RX_LNET_HEADER: - if (the_lnet.ln_pid & LNET_PID_USERFLAG) { - /* replace dest_nid,pid (ksocknal sets its own) */ - conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr.dest_nid = - cpu_to_le64(conn->uc_peer->up_ni->ni_nid); - conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr.dest_pid = - cpu_to_le32(the_lnet.ln_pid); - - } else if (conn->uc_peer->up_peerid.pid & LNET_PID_USERFLAG) { - /* Userspace peer */ - lnet_process_id_t *id = &conn->uc_peer->up_peerid; - lnet_hdr_t *lhdr = &conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr; - - /* Substitute process ID assigned at connection time */ - lhdr->src_pid = cpu_to_le32(id->pid); - lhdr->src_nid = cpu_to_le64(id->nid); - } - - conn->uc_rx_state = UC_RX_PARSE; - usocklnd_conn_addref(conn); /* ++ref while parsing */ - - rc = lnet_parse(conn->uc_peer->up_ni, - &conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr, - conn->uc_peerid.nid, conn, 0); - - if (rc < 0) { - /* I just received garbage: give up on this conn */ - conn->uc_errored = 1; - usocklnd_conn_decref(conn); - return -EPROTO; - } - - /* Race with usocklnd_recv() is possible */ - pthread_mutex_lock(&conn->uc_lock); - LASSERT (conn->uc_rx_state == UC_RX_PARSE || - conn->uc_rx_state == UC_RX_LNET_PAYLOAD); - - /* check whether usocklnd_recv() got called */ - if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) - *cont_flag = 1; - pthread_mutex_unlock(&conn->uc_lock); - break; - - case UC_RX_PARSE: - LBUG(); /* it's error to be here, because this special - * case is handled by caller */ - break; - - case UC_RX_PARSE_WAIT: - LBUG(); /* it's error to be here, because the conn - * shouldn't wait for POLLIN event in this - * state */ - break; - - case UC_RX_LNET_PAYLOAD: - /* payload all received */ - - lnet_finalize(conn->uc_peer->up_ni, conn->uc_rx_lnetmsg, 0); - - cookie = conn->uc_rx_msg.ksm_zc_req_cookie; - if (cookie != 0) - rc = usocklnd_handle_zc_req(conn->uc_peer, cookie); - - if (rc != 0) { - /* change state not to finalize twice */ - conn->uc_rx_state = UC_RX_KSM_HEADER; - return -EPROTO; - } - - /* Fall through */ - - case UC_RX_SKIPPING: - if (conn->uc_rx_nob_left != 0) { - usocklnd_rx_skipping_state_transition(conn); - *cont_flag = 1; - } else { - usocklnd_rx_ksmhdr_state_transition(conn); - rc = 1; /* whole packet is read */ - } - - break; - - default: - LBUG(); /* unknown state */ - } - - return rc; -} - -/* Handle incoming ZC request from sender. - * NB: it's called only from read_handler, so we're sure that - * the conn cannot become zombie in the middle of processing */ -int -usocklnd_handle_zc_req(usock_peer_t *peer, __u64 cookie) -{ - usock_conn_t *conn; - usock_zc_ack_t *zc_ack; - int type; - int rc; - int dummy; - - LIBCFS_ALLOC (zc_ack, sizeof(*zc_ack)); - if (zc_ack == NULL) - return -ENOMEM; - zc_ack->zc_cookie = cookie; - - /* Let's assume that CONTROL is the best type for zcack, - * but userspace clients don't use typed connections */ - if (the_lnet.ln_pid & LNET_PID_USERFLAG) - type = SOCKLND_CONN_ANY; - else - type = SOCKLND_CONN_CONTROL; - - rc = usocklnd_find_or_create_conn(peer, type, &conn, NULL, zc_ack, - &dummy); - if (rc != 0) { - LIBCFS_FREE (zc_ack, sizeof(*zc_ack)); - return rc; - } - usocklnd_conn_decref(conn); - - return 0; -} - -/* Switch on rx_state. - * Return 0 on success, else return <0 - * Always set cont_flag: 1 if we're ready to continue reading, else 0 - */ -int -usocklnd_read_hello(usock_conn_t *conn, int *cont_flag) -{ - int rc = 0; - ksock_hello_msg_t *hello = conn->uc_rx_hello; - - *cont_flag = 0; - - /* smth. new emerged in hello - let's process it */ - switch (conn->uc_rx_state) { - case UC_RX_HELLO_MAGIC: - if (hello->kshm_magic == LNET_PROTO_MAGIC) - conn->uc_flip = 0; - else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC)) - conn->uc_flip = 1; - else - return -EPROTO; - - usocklnd_rx_helloversion_state_transition(conn); - *cont_flag = 1; - break; - - case UC_RX_HELLO_VERSION: - if ((!conn->uc_flip && - (hello->kshm_version != KSOCK_PROTO_V2)) || - (conn->uc_flip && - (hello->kshm_version != __swab32(KSOCK_PROTO_V2)))) - return -EPROTO; - - usocklnd_rx_hellobody_state_transition(conn); - *cont_flag = 1; - break; - - case UC_RX_HELLO_BODY: - if (conn->uc_flip) { - ksock_hello_msg_t *hello = conn->uc_rx_hello; - __swab32s(&hello->kshm_src_pid); - __swab64s(&hello->kshm_src_nid); - __swab32s(&hello->kshm_dst_pid); - __swab64s(&hello->kshm_dst_nid); - __swab64s(&hello->kshm_src_incarnation); - __swab64s(&hello->kshm_dst_incarnation); - __swab32s(&hello->kshm_ctype); - __swab32s(&hello->kshm_nips); - } - - if (conn->uc_rx_hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %u.%u.%u.%u port %d\n", - conn->uc_rx_hello->kshm_nips, - HIPQUAD(conn->uc_peer_ip), conn->uc_peer_port); - return -EPROTO; - } - - if (conn->uc_rx_hello->kshm_nips) { - usocklnd_rx_helloIPs_state_transition(conn); - *cont_flag = 1; - break; - } - /* fall through */ - - case UC_RX_HELLO_IPS: - if (conn->uc_activeflag == 1) /* active conn */ - rc = usocklnd_activeconn_hellorecv(conn); - else /* passive conn */ - rc = usocklnd_passiveconn_hellorecv(conn); - - break; - - default: - LBUG(); /* unknown state */ - } - - return rc; -} - -/* All actions that we need after receiving hello on active conn: - * 1) Schedule removing if we're zombie - * 2) Restart active conn if we lost the race - * 3) Else: update RX part to receive KSM header - */ -int -usocklnd_activeconn_hellorecv(usock_conn_t *conn) -{ - int rc = 0; - ksock_hello_msg_t *hello = conn->uc_rx_hello; - usock_peer_t *peer = conn->uc_peer; - - /* Active conn with peer==NULL is zombie. - * Don't try to link it to peer because the conn - * has already had a chance to proceed at the beginning */ - if (peer == NULL) { - LASSERT(list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list)); - - usocklnd_conn_kill(conn); - return 0; - } - - peer->up_last_alive = cfs_time_current(); - - /* peer says that we lost the race */ - if (hello->kshm_ctype == SOCKLND_CONN_NONE) { - /* Start new active conn, relink txs and zc_acks from - * the conn to new conn, schedule removing the conn. - * Actually, we're expecting that a passive conn will - * make us zombie soon and take care of our txs and - * zc_acks */ - - struct list_head tx_list, zcack_list; - usock_conn_t *conn2; - int idx = usocklnd_type2idx(conn->uc_type); - - CFS_INIT_LIST_HEAD (&tx_list); - CFS_INIT_LIST_HEAD (&zcack_list); - - /* Block usocklnd_send() to check peer->up_conns[idx] - * and to enqueue more txs */ - pthread_mutex_lock(&peer->up_lock); - pthread_mutex_lock(&conn->uc_lock); - - /* usocklnd_shutdown() could kill us */ - if (conn->uc_state == UC_DEAD) { - pthread_mutex_unlock(&conn->uc_lock); - pthread_mutex_unlock(&peer->up_lock); - return 0; - } - - LASSERT (peer == conn->uc_peer); - LASSERT (peer->up_conns[idx] == conn); - - rc = usocklnd_create_active_conn(peer, conn->uc_type, &conn2); - if (rc) { - conn->uc_errored = 1; - pthread_mutex_unlock(&conn->uc_lock); - pthread_mutex_unlock(&peer->up_lock); - return rc; - } - - usocklnd_link_conn_to_peer(conn2, peer, idx); - conn2->uc_peer = peer; - - /* unlink txs and zcack from the conn */ - list_add(&tx_list, &conn->uc_tx_list); - list_del_init(&conn->uc_tx_list); - list_add(&zcack_list, &conn->uc_zcack_list); - list_del_init(&conn->uc_zcack_list); - - /* link they to the conn2 */ - list_add(&conn2->uc_tx_list, &tx_list); - list_del_init(&tx_list); - list_add(&conn2->uc_zcack_list, &zcack_list); - list_del_init(&zcack_list); - - /* make conn zombie */ - conn->uc_peer = NULL; - usocklnd_peer_decref(peer); - - /* schedule conn2 for processing */ - rc = usocklnd_add_pollrequest(conn2, POLL_ADD_REQUEST, POLLOUT); - if (rc) { - peer->up_conns[idx] = NULL; - usocklnd_conn_decref(conn2); /* should destroy conn */ - } else { - usocklnd_conn_kill_locked(conn); - } - - pthread_mutex_unlock(&conn->uc_lock); - pthread_mutex_unlock(&peer->up_lock); - usocklnd_conn_decref(conn); - - } else { /* hello->kshm_ctype != SOCKLND_CONN_NONE */ - if (conn->uc_type != usocklnd_invert_type(hello->kshm_ctype)) - return -EPROTO; - - pthread_mutex_lock(&peer->up_lock); - usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation, - conn); - pthread_mutex_unlock(&peer->up_lock); - - /* safely transit to UC_READY state */ - /* rc == 0 */ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state != UC_DEAD) { - usocklnd_rx_ksmhdr_state_transition(conn); - - /* POLLIN is already set because we just - * received hello, but maybe we've smth. to - * send? */ - LASSERT (conn->uc_sending == 0); - if ( !list_empty(&conn->uc_tx_list) || - !list_empty(&conn->uc_zcack_list) ) { - - conn->uc_tx_deadline = - cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - rc = usocklnd_add_pollrequest(conn, - POLL_SET_REQUEST, - POLLIN | POLLOUT); - } - - if (rc == 0) - conn->uc_state = UC_READY; - } - pthread_mutex_unlock(&conn->uc_lock); - } - - return rc; -} - -/* All actions that we need after receiving hello on passive conn: - * 1) Stash peer's nid, pid, incarnation and conn type - * 2) Cope with easy case: conn[idx] is empty - just save conn there - * 3) Resolve race: - * a) if our nid is higher - reply with CONN_NONE and make us zombie - * b) if peer's nid is higher - postpone race resolution till - * READY state - * 4) Anyhow, send reply hello -*/ -int -usocklnd_passiveconn_hellorecv(usock_conn_t *conn) -{ - ksock_hello_msg_t *hello = conn->uc_rx_hello; - int type; - int idx; - int rc; - usock_peer_t *peer; - lnet_ni_t *ni = conn->uc_ni; - __u32 peer_ip = conn->uc_peer_ip; - __u16 peer_port = conn->uc_peer_port; - - /* don't know parent peer yet and not zombie */ - LASSERT (conn->uc_peer == NULL && - ni != NULL); - - /* don't know peer's nid and incarnation yet */ - if (peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - /* do not trust liblustre clients */ - conn->uc_peerid.pid = peer_port | LNET_PID_USERFLAG; - conn->uc_peerid.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - peer_ip); - if (hello->kshm_ctype != SOCKLND_CONN_ANY) { - lnet_ni_decref(ni); - conn->uc_ni = NULL; - CERROR("Refusing to accept connection of type=%d from " - "userspace process %u.%u.%u.%u:%d\n", hello->kshm_ctype, - HIPQUAD(peer_ip), peer_port); - return -EINVAL; - } - } else { - conn->uc_peerid.pid = hello->kshm_src_pid; - conn->uc_peerid.nid = hello->kshm_src_nid; - } - conn->uc_type = type = usocklnd_invert_type(hello->kshm_ctype); - - rc = usocklnd_find_or_create_peer(ni, conn->uc_peerid, &peer); - if (rc) { - lnet_ni_decref(ni); - conn->uc_ni = NULL; - return rc; - } - - peer->up_last_alive = cfs_time_current(); - - idx = usocklnd_type2idx(conn->uc_type); - - /* safely check whether we're first */ - pthread_mutex_lock(&peer->up_lock); - - usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation, NULL); - - if (peer->up_conns[idx] == NULL) { - peer->up_last_alive = cfs_time_current(); - conn->uc_peer = peer; - conn->uc_ni = NULL; - usocklnd_link_conn_to_peer(conn, peer, idx); - usocklnd_conn_addref(conn); - } else { - usocklnd_peer_decref(peer); - - /* Resolve race in favour of higher NID */ - if (conn->uc_peerid.nid < conn->uc_ni->ni_nid) { - /* make us zombie */ - conn->uc_ni = NULL; - type = SOCKLND_CONN_NONE; - } - - /* if conn->uc_peerid.nid > conn->uc_ni->ni_nid, - * postpone race resolution till READY state - * (hopefully that conn[idx] will die because of - * incoming hello of CONN_NONE type) */ - } - pthread_mutex_unlock(&peer->up_lock); - - /* allocate and initialize fake tx with hello */ - conn->uc_tx_hello = usocklnd_create_hello_tx(ni, type, - conn->uc_peerid.nid); - if (conn->uc_ni == NULL) - lnet_ni_decref(ni); - - if (conn->uc_tx_hello == NULL) - return -ENOMEM; - - /* rc == 0 */ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state == UC_DEAD) - goto passive_hellorecv_done; - - conn->uc_state = UC_SENDING_HELLO; - conn->uc_tx_deadline = cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, POLLOUT); - - passive_hellorecv_done: - pthread_mutex_unlock(&conn->uc_lock); - return rc; -} - -int -usocklnd_write_handler(usock_conn_t *conn) -{ - usock_tx_t *tx; - int ret; - int rc = 0; - int state; - usock_peer_t *peer; - lnet_ni_t *ni; - - pthread_mutex_lock(&conn->uc_lock); /* like membar */ - state = conn->uc_state; - pthread_mutex_unlock(&conn->uc_lock); - - switch (state) { - case UC_CONNECTING: - /* hello_tx has already been initialized - * in usocklnd_create_active_conn() */ - usocklnd_conn_new_state(conn, UC_SENDING_HELLO); - /* fall through */ - - case UC_SENDING_HELLO: - rc = usocklnd_send_tx(conn, conn->uc_tx_hello); - if (rc <= 0) /* error or partial send or connection closed */ - break; - - /* tx with hello was sent successfully */ - usocklnd_destroy_tx(NULL, conn->uc_tx_hello); - conn->uc_tx_hello = NULL; - - if (conn->uc_activeflag == 1) /* active conn */ - rc = usocklnd_activeconn_hellosent(conn); - else /* passive conn */ - rc = usocklnd_passiveconn_hellosent(conn); - - break; - - case UC_READY: - pthread_mutex_lock(&conn->uc_lock); - - peer = conn->uc_peer; - LASSERT (peer != NULL); - ni = peer->up_ni; - - if (list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list)) { - LASSERT(usock_tuns.ut_fair_limit > 1); - pthread_mutex_unlock(&conn->uc_lock); - return 0; - } - - tx = usocklnd_try_piggyback(&conn->uc_tx_list, - &conn->uc_zcack_list); - if (tx != NULL) - conn->uc_sending = 1; - else - rc = -ENOMEM; - - pthread_mutex_unlock(&conn->uc_lock); - - if (rc) - break; - - rc = usocklnd_send_tx(conn, tx); - if (rc == 0) { /* partial send or connection closed */ - pthread_mutex_lock(&conn->uc_lock); - list_add(&tx->tx_list, &conn->uc_tx_list); - conn->uc_sending = 0; - pthread_mutex_unlock(&conn->uc_lock); - break; - } - if (rc < 0) { /* real error */ - usocklnd_destroy_tx(ni, tx); - break; - } - - /* rc == 1: tx was sent completely */ - usocklnd_destroy_tx(ni, tx); - - pthread_mutex_lock(&conn->uc_lock); - conn->uc_sending = 0; - if (conn->uc_state != UC_DEAD && - list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list)) { - conn->uc_tx_flag = 0; - ret = usocklnd_add_pollrequest(conn, - POLL_TX_SET_REQUEST, 0); - if (ret) - rc = ret; - } - pthread_mutex_unlock(&conn->uc_lock); - - break; - - case UC_DEAD: - break; - - default: - LBUG(); - } - - if (rc < 0) - usocklnd_conn_kill(conn); - - return rc; -} - -/* Return the first tx from tx_list with piggybacked zc_ack - * from zcack_list when possible. If tx_list is empty, return - * brand new noop tx for zc_ack from zcack_list. Return NULL - * if an error happened */ -usock_tx_t * -usocklnd_try_piggyback(struct list_head *tx_list_p, - struct list_head *zcack_list_p) -{ - usock_tx_t *tx; - usock_zc_ack_t *zc_ack; - - /* assign tx and zc_ack */ - if (list_empty(tx_list_p)) - tx = NULL; - else { - tx = list_entry(tx_list_p->next, usock_tx_t, tx_list); - list_del(&tx->tx_list); - - /* already piggybacked or partially send */ - if (tx->tx_msg.ksm_zc_ack_cookie || - tx->tx_resid != tx->tx_nob) - return tx; - } - - if (list_empty(zcack_list_p)) { - /* nothing to piggyback */ - return tx; - } else { - zc_ack = list_entry(zcack_list_p->next, - usock_zc_ack_t, zc_list); - list_del(&zc_ack->zc_list); - } - - if (tx != NULL) - /* piggyback the zc-ack cookie */ - tx->tx_msg.ksm_zc_ack_cookie = zc_ack->zc_cookie; - else - /* cannot piggyback, need noop */ - tx = usocklnd_create_noop_tx(zc_ack->zc_cookie); - - LIBCFS_FREE (zc_ack, sizeof(*zc_ack)); - return tx; -} - -/* All actions that we need after sending hello on active conn: - * 1) update RX iov to receive hello - * 2) state transition to UC_RECEIVING_HELLO - * 3) notify poll_thread that we're waiting for incoming hello */ -int -usocklnd_activeconn_hellosent(usock_conn_t *conn) -{ - int rc = 0; - - pthread_mutex_lock(&conn->uc_lock); - - if (conn->uc_state != UC_DEAD) { - usocklnd_rx_hellomagic_state_transition(conn); - conn->uc_state = UC_RECEIVING_HELLO; - conn->uc_tx_flag = 0; - rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, POLLIN); - } - - pthread_mutex_unlock(&conn->uc_lock); - - return rc; -} - -/* All actions that we need after sending hello on passive conn: - * 1) Cope with 1st easy case: conn is already linked to a peer - * 2) Cope with 2nd easy case: remove zombie conn - * 3) Resolve race: - * a) find the peer - * b) link the conn to the peer if conn[idx] is empty - * c) if the conn[idx] isn't empty and is in READY state, - * remove the conn as duplicated - * d) if the conn[idx] isn't empty and isn't in READY state, - * override conn[idx] with the conn - */ -int -usocklnd_passiveconn_hellosent(usock_conn_t *conn) -{ - usock_conn_t *conn2; - usock_peer_t *peer; - struct list_head tx_list; - struct list_head zcack_list; - int idx; - int rc = 0; - - /* almost nothing to do if conn is already linked to peer hash table */ - if (conn->uc_peer != NULL) - goto passive_hellosent_done; - - /* conn->uc_peer == NULL, so the conn isn't accessible via - * peer hash list, so nobody can touch the conn but us */ - - if (conn->uc_ni == NULL) /* remove zombie conn */ - goto passive_hellosent_connkill; - - /* all code below is race resolution, because normally - * passive conn is linked to peer just after receiving hello */ - CFS_INIT_LIST_HEAD (&tx_list); - CFS_INIT_LIST_HEAD (&zcack_list); - - /* conn is passive and isn't linked to any peer, - so its tx and zc_ack lists have to be empty */ - LASSERT (list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list) && - conn->uc_sending == 0); - - rc = usocklnd_find_or_create_peer(conn->uc_ni, conn->uc_peerid, &peer); - if (rc) - return rc; - - idx = usocklnd_type2idx(conn->uc_type); - - /* try to link conn to peer */ - pthread_mutex_lock(&peer->up_lock); - if (peer->up_conns[idx] == NULL) { - usocklnd_link_conn_to_peer(conn, peer, idx); - usocklnd_conn_addref(conn); - conn->uc_peer = peer; - usocklnd_peer_addref(peer); - } else { - conn2 = peer->up_conns[idx]; - pthread_mutex_lock(&conn2->uc_lock); - - if (conn2->uc_state == UC_READY) { - /* conn2 is in READY state, so conn is "duplicated" */ - pthread_mutex_unlock(&conn2->uc_lock); - pthread_mutex_unlock(&peer->up_lock); - usocklnd_peer_decref(peer); - goto passive_hellosent_connkill; - } - - /* uc_state != UC_READY => switch conn and conn2 */ - /* Relink txs and zc_acks from conn2 to conn. - * We're sure that nobody but us can access to conn, - * nevertheless we use mutex (if we're wrong yet, - * deadlock is easy to see that corrupted list */ - list_add(&tx_list, &conn2->uc_tx_list); - list_del_init(&conn2->uc_tx_list); - list_add(&zcack_list, &conn2->uc_zcack_list); - list_del_init(&conn2->uc_zcack_list); - - pthread_mutex_lock(&conn->uc_lock); - list_add_tail(&conn->uc_tx_list, &tx_list); - list_del_init(&tx_list); - list_add_tail(&conn->uc_zcack_list, &zcack_list); - list_del_init(&zcack_list); - conn->uc_peer = peer; - pthread_mutex_unlock(&conn->uc_lock); - - conn2->uc_peer = NULL; /* make conn2 zombie */ - pthread_mutex_unlock(&conn2->uc_lock); - usocklnd_conn_decref(conn2); - - usocklnd_link_conn_to_peer(conn, peer, idx); - usocklnd_conn_addref(conn); - conn->uc_peer = peer; - } - - lnet_ni_decref(conn->uc_ni); - conn->uc_ni = NULL; - pthread_mutex_unlock(&peer->up_lock); - usocklnd_peer_decref(peer); - - passive_hellosent_done: - /* safely transit to UC_READY state */ - /* rc == 0 */ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state != UC_DEAD) { - usocklnd_rx_ksmhdr_state_transition(conn); - - /* we're ready to recive incoming packets and maybe - already have smth. to transmit */ - LASSERT (conn->uc_sending == 0); - if ( list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list) ) { - conn->uc_tx_flag = 0; - rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, - POLLIN); - } else { - conn->uc_tx_deadline = - cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, - POLLIN | POLLOUT); - } - - if (rc == 0) - conn->uc_state = UC_READY; - } - pthread_mutex_unlock(&conn->uc_lock); - return rc; - - passive_hellosent_connkill: - usocklnd_conn_kill(conn); - return 0; -} - -/* Send as much tx data as possible. - * Returns 0 or 1 on succsess, <0 if fatal error. - * 0 means partial send or non-fatal error, 1 - complete. - * Rely on libcfs_sock_writev() for differentiating fatal and - * non-fatal errors. An error should be considered as non-fatal if: - * 1) it still makes sense to continue reading && - * 2) anyway, poll() will set up POLLHUP|POLLERR flags */ -int -usocklnd_send_tx(usock_conn_t *conn, usock_tx_t *tx) -{ - struct iovec *iov; - int nob; - int fd = conn->uc_fd; - cfs_time_t t; - - LASSERT (tx->tx_resid != 0); - - do { - usock_peer_t *peer = conn->uc_peer; - - LASSERT (tx->tx_niov > 0); - - nob = libcfs_sock_writev(fd, tx->tx_iov, tx->tx_niov); - if (nob < 0) - conn->uc_errored = 1; - if (nob <= 0) /* write queue is flow-controlled or error */ - return nob; - - LASSERT (nob <= tx->tx_resid); - tx->tx_resid -= nob; - t = cfs_time_current(); - conn->uc_tx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout)); - - if(peer != NULL) - peer->up_last_alive = t; - - /* "consume" iov */ - iov = tx->tx_iov; - do { - LASSERT (tx->tx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); - iov->iov_len -= nob; - break; - } - - nob -= iov->iov_len; - tx->tx_iov = ++iov; - tx->tx_niov--; - } while (nob != 0); - - } while (tx->tx_resid != 0); - - return 1; /* send complete */ -} - -/* Read from wire as much data as possible. - * Returns 0 or 1 on succsess, <0 if error or EOF. - * 0 means partial read, 1 - complete */ -int -usocklnd_read_data(usock_conn_t *conn) -{ - struct iovec *iov; - int nob; - cfs_time_t t; - - LASSERT (conn->uc_rx_nob_wanted != 0); - - do { - usock_peer_t *peer = conn->uc_peer; - - LASSERT (conn->uc_rx_niov > 0); - - nob = libcfs_sock_readv(conn->uc_fd, conn->uc_rx_iov, conn->uc_rx_niov); - if (nob <= 0) {/* read nothing or error */ - conn->uc_errored = 1; - return nob; - } - - LASSERT (nob <= conn->uc_rx_nob_wanted); - conn->uc_rx_nob_wanted -= nob; - conn->uc_rx_nob_left -= nob; - t = cfs_time_current(); - conn->uc_rx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout)); - - if(peer != NULL) - peer->up_last_alive = t; - - /* "consume" iov */ - iov = conn->uc_rx_iov; - do { - LASSERT (conn->uc_rx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); - iov->iov_len -= nob; - break; - } - - nob -= iov->iov_len; - conn->uc_rx_iov = ++iov; - conn->uc_rx_niov--; - } while (nob != 0); - - } while (conn->uc_rx_nob_wanted != 0); - - return 1; /* read complete */ -} diff --git a/lnet/ulnds/socklnd/poll.c b/lnet/ulnds/socklnd/poll.c deleted file mode 100644 index ba08deeb5d116bc5be8cf2b25b80ce1dec2a50b7..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/poll.c +++ /dev/null @@ -1,483 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Maxim Patlasov <maxim@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - */ - -#include "usocklnd.h" -#include <unistd.h> -#include <sys/syscall.h> - -void -usocklnd_process_stale_list(usock_pollthread_t *pt_data) -{ - while (!list_empty(&pt_data->upt_stale_list)) { - usock_conn_t *conn; - conn = list_entry(pt_data->upt_stale_list.next, - usock_conn_t, uc_stale_list); - - list_del(&conn->uc_stale_list); - - usocklnd_tear_peer_conn(conn); - usocklnd_conn_decref(conn); /* -1 for idx2conn[idx] or pr */ - } -} - -int -usocklnd_poll_thread(void *arg) -{ - int rc = 0; - usock_pollthread_t *pt_data = (usock_pollthread_t *)arg; - cfs_time_t current_time; - cfs_time_t planned_time; - int idx; - int idx_start; - int idx_finish; - int chunk; - int saved_nfds; - int extra; - int times; - - /* mask signals to avoid SIGPIPE, etc */ - sigset_t sigs; - sigfillset (&sigs); - pthread_sigmask (SIG_SETMASK, &sigs, 0); - - LASSERT(pt_data != NULL); - - planned_time = cfs_time_shift(usock_tuns.ut_poll_timeout); - chunk = usocklnd_calculate_chunk_size(pt_data->upt_nfds); - saved_nfds = pt_data->upt_nfds; - idx_start = 1; - - /* Main loop */ - while (usock_data.ud_shutdown == 0) { - rc = 0; - - /* Process all enqueued poll requests */ - pthread_mutex_lock(&pt_data->upt_pollrequests_lock); - while (!list_empty(&pt_data->upt_pollrequests)) { - usock_pollrequest_t *pr; - pr = list_entry(pt_data->upt_pollrequests.next, - usock_pollrequest_t, upr_list); - - list_del(&pr->upr_list); - rc = usocklnd_process_pollrequest(pr, pt_data); - if (rc) - break; - } - pthread_mutex_unlock(&pt_data->upt_pollrequests_lock); - - if (rc) - break; - - /* Delete conns orphaned due to POLL_DEL_REQUESTs */ - usocklnd_process_stale_list(pt_data); - - /* Actual polling for events */ - rc = poll(pt_data->upt_pollfd, - pt_data->upt_nfds, - usock_tuns.ut_poll_timeout * 1000); - - if (rc < 0) { - CERROR("Cannot poll(2): errno=%d\n", errno); - break; - } - - if (rc > 0) - usocklnd_execute_handlers(pt_data); - - current_time = cfs_time_current(); - - if (pt_data->upt_nfds < 2 || - cfs_time_before(current_time, planned_time)) - continue; - - /* catch up growing pollfd[] */ - if (pt_data->upt_nfds > saved_nfds) { - extra = pt_data->upt_nfds - saved_nfds; - saved_nfds = pt_data->upt_nfds; - } else { - extra = 0; - } - - times = cfs_duration_sec(cfs_time_sub(current_time, planned_time)) + 1; - idx_finish = MIN(idx_start + chunk*times + extra, pt_data->upt_nfds); - - for (idx = idx_start; idx < idx_finish; idx++) { - usock_conn_t *conn = pt_data->upt_idx2conn[idx]; - pthread_mutex_lock(&conn->uc_lock); - if (usocklnd_conn_timed_out(conn, current_time) && - conn->uc_state != UC_DEAD) { - conn->uc_errored = 1; - usocklnd_conn_kill_locked(conn); - } - pthread_mutex_unlock(&conn->uc_lock); - } - - if (idx_finish == pt_data->upt_nfds) { - chunk = usocklnd_calculate_chunk_size(pt_data->upt_nfds); - saved_nfds = pt_data->upt_nfds; - idx_start = 1; - } - else { - idx_start = idx_finish; - } - - planned_time = cfs_time_add(current_time, - cfs_time_seconds(usock_tuns.ut_poll_timeout)); - } - - /* All conns should be deleted by POLL_DEL_REQUESTs while shutdown */ - LASSERT (rc != 0 || pt_data->upt_nfds == 1); - - if (rc) { - pthread_mutex_lock(&pt_data->upt_pollrequests_lock); - - /* Block new poll requests to be enqueued */ - pt_data->upt_errno = rc; - - while (!list_empty(&pt_data->upt_pollrequests)) { - usock_pollrequest_t *pr; - pr = list_entry(pt_data->upt_pollrequests.next, - usock_pollrequest_t, upr_list); - - list_del(&pr->upr_list); - - if (pr->upr_type == POLL_ADD_REQUEST) { - close(pr->upr_conn->uc_fd); - list_add_tail(&pr->upr_conn->uc_stale_list, - &pt_data->upt_stale_list); - } else { - usocklnd_conn_decref(pr->upr_conn); - } - - LIBCFS_FREE (pr, sizeof(*pr)); - } - pthread_mutex_unlock(&pt_data->upt_pollrequests_lock); - - usocklnd_process_stale_list(pt_data); - - for (idx = 1; idx < pt_data->upt_nfds; idx++) { - usock_conn_t *conn = pt_data->upt_idx2conn[idx]; - LASSERT(conn != NULL); - close(conn->uc_fd); - usocklnd_tear_peer_conn(conn); - usocklnd_conn_decref(conn); - } - } - - /* unblock usocklnd_shutdown() */ - cfs_complete(&pt_data->upt_completion); - - return 0; -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_add_pollrequest(usock_conn_t *conn, int type, short value) -{ - int pt_idx = conn->uc_pt_idx; - usock_pollthread_t *pt = &usock_data.ud_pollthreads[pt_idx]; - usock_pollrequest_t *pr; - - LIBCFS_ALLOC(pr, sizeof(*pr)); - if (pr == NULL) { - CERROR ("Cannot allocate poll request\n"); - return -ENOMEM; - } - - pr->upr_conn = conn; - pr->upr_type = type; - pr->upr_value = value; - - usocklnd_conn_addref(conn); /* +1 for poll request */ - - pthread_mutex_lock(&pt->upt_pollrequests_lock); - - if (pt->upt_errno) { /* very rare case: errored poll thread */ - int rc = pt->upt_errno; - pthread_mutex_unlock(&pt->upt_pollrequests_lock); - usocklnd_conn_decref(conn); - LIBCFS_FREE(pr, sizeof(*pr)); - return rc; - } - - list_add_tail(&pr->upr_list, &pt->upt_pollrequests); - pthread_mutex_unlock(&pt->upt_pollrequests_lock); - return 0; -} - -void -usocklnd_add_killrequest(usock_conn_t *conn) -{ - int pt_idx = conn->uc_pt_idx; - usock_pollthread_t *pt = &usock_data.ud_pollthreads[pt_idx]; - usock_pollrequest_t *pr = conn->uc_preq; - - /* Use preallocated poll request because there is no good - * workaround for ENOMEM error while killing connection */ - if (pr) { - pr->upr_conn = conn; - pr->upr_type = POLL_DEL_REQUEST; - pr->upr_value = 0; - - usocklnd_conn_addref(conn); /* +1 for poll request */ - - pthread_mutex_lock(&pt->upt_pollrequests_lock); - - if (pt->upt_errno) { /* very rare case: errored poll thread */ - pthread_mutex_unlock(&pt->upt_pollrequests_lock); - usocklnd_conn_decref(conn); - return; /* conn will be killed in poll thread anyway */ - } - - list_add_tail(&pr->upr_list, &pt->upt_pollrequests); - pthread_mutex_unlock(&pt->upt_pollrequests_lock); - - conn->uc_preq = NULL; - } -} - -/* Process poll request. Update poll data. - * Returns 0 on success, <0 else */ -int -usocklnd_process_pollrequest(usock_pollrequest_t *pr, - usock_pollthread_t *pt_data) -{ - int type = pr->upr_type; - short value = pr->upr_value; - usock_conn_t *conn = pr->upr_conn; - int idx = 0; - struct pollfd *pollfd = pt_data->upt_pollfd; - int *fd2idx = pt_data->upt_fd2idx; - usock_conn_t **idx2conn = pt_data->upt_idx2conn; - int *skip = pt_data->upt_skip; - - LASSERT(conn != NULL); - LASSERT(conn->uc_fd >=0); - LASSERT(type == POLL_ADD_REQUEST || - conn->uc_fd < pt_data->upt_nfd2idx); - - if (type != POLL_ADD_REQUEST) { - idx = fd2idx[conn->uc_fd]; - if (idx > 0 && idx < pt_data->upt_nfds) { /* hot path */ - LASSERT(pollfd[idx].fd == conn->uc_fd); - } else { /* unlikely */ - CWARN("Very unlikely event happend: trying to" - " handle poll request of type %d but idx=%d" - " is out of range [1 ... %d]. Is shutdown" - " in progress (%d)?\n", - type, idx, pt_data->upt_nfds - 1, - usock_data.ud_shutdown); - - LIBCFS_FREE (pr, sizeof(*pr)); - usocklnd_conn_decref(conn); - return 0; - } - } - - LIBCFS_FREE (pr, sizeof(*pr)); - - switch (type) { - case POLL_ADD_REQUEST: - if (pt_data->upt_nfds >= pt_data->upt_npollfd) { - /* resize pollfd[], idx2conn[] and skip[] */ - struct pollfd *new_pollfd; - int new_npollfd = pt_data->upt_npollfd * 2; - usock_conn_t **new_idx2conn; - int *new_skip; - - new_pollfd = LIBCFS_REALLOC(pollfd, new_npollfd * - sizeof(struct pollfd)); - if (new_pollfd == NULL) - goto process_pollrequest_enomem; - pt_data->upt_pollfd = pollfd = new_pollfd; - - new_idx2conn = LIBCFS_REALLOC(idx2conn, new_npollfd * - sizeof(usock_conn_t *)); - if (new_idx2conn == NULL) - goto process_pollrequest_enomem; - pt_data->upt_idx2conn = idx2conn = new_idx2conn; - - new_skip = LIBCFS_REALLOC(skip, new_npollfd * - sizeof(int)); - if (new_skip == NULL) - goto process_pollrequest_enomem; - pt_data->upt_skip = new_skip; - - pt_data->upt_npollfd = new_npollfd; - } - - if (conn->uc_fd >= pt_data->upt_nfd2idx) { - /* resize fd2idx[] */ - int *new_fd2idx; - int new_nfd2idx = pt_data->upt_nfd2idx * 2; - - while (new_nfd2idx <= conn->uc_fd) - new_nfd2idx *= 2; - - new_fd2idx = LIBCFS_REALLOC(fd2idx, new_nfd2idx * - sizeof(int)); - if (new_fd2idx == NULL) - goto process_pollrequest_enomem; - - pt_data->upt_fd2idx = fd2idx = new_fd2idx; - memset(fd2idx + pt_data->upt_nfd2idx, 0, - (new_nfd2idx - pt_data->upt_nfd2idx) - * sizeof(int)); - pt_data->upt_nfd2idx = new_nfd2idx; - } - - LASSERT(fd2idx[conn->uc_fd] == 0); - - idx = pt_data->upt_nfds++; - idx2conn[idx] = conn; - fd2idx[conn->uc_fd] = idx; - - pollfd[idx].fd = conn->uc_fd; - pollfd[idx].events = value; - pollfd[idx].revents = 0; - break; - case POLL_DEL_REQUEST: - fd2idx[conn->uc_fd] = 0; /* invalidate this entry */ - - --pt_data->upt_nfds; - if (idx != pt_data->upt_nfds) { - /* shift last entry into released position */ - memcpy(&pollfd[idx], &pollfd[pt_data->upt_nfds], - sizeof(struct pollfd)); - idx2conn[idx] = idx2conn[pt_data->upt_nfds]; - fd2idx[pollfd[idx].fd] = idx; - } - - close(conn->uc_fd); - list_add_tail(&conn->uc_stale_list, &pt_data->upt_stale_list); - break; - case POLL_RX_SET_REQUEST: - pollfd[idx].events = (pollfd[idx].events & ~POLLIN) | value; - break; - case POLL_TX_SET_REQUEST: - pollfd[idx].events = (pollfd[idx].events & ~POLLOUT) | value; - break; - case POLL_SET_REQUEST: - pollfd[idx].events = value; - break; - default: - LBUG(); /* unknown type */ - } - - /* In the case of POLL_ADD_REQUEST, idx2conn[idx] takes the - * reference that poll request possesses */ - if (type != POLL_ADD_REQUEST) - usocklnd_conn_decref(conn); - - return 0; - - process_pollrequest_enomem: - usocklnd_conn_decref(conn); - return -ENOMEM; -} - -/* Loop on poll data executing handlers repeatedly until - * fair_limit is reached or all entries are exhausted */ -void -usocklnd_execute_handlers(usock_pollthread_t *pt_data) -{ - struct pollfd *pollfd = pt_data->upt_pollfd; - int nfds = pt_data->upt_nfds; - usock_conn_t **idx2conn = pt_data->upt_idx2conn; - int *skip = pt_data->upt_skip; - int j; - - if (pollfd[0].revents & POLLIN) - while (usocklnd_notifier_handler(pollfd[0].fd) > 0) - ; - - skip[0] = 1; /* always skip notifier fd */ - - for (j = 0; j < usock_tuns.ut_fair_limit; j++) { - int prev = 0; - int i = skip[0]; - - if (i >= nfds) /* nothing ready */ - break; - - do { - usock_conn_t *conn = idx2conn[i]; - int next; - - if (j == 0) /* first pass... */ - next = skip[i] = i+1; /* set skip chain */ - else /* later passes... */ - next = skip[i]; /* skip unready pollfds */ - - /* kill connection if it's closed by peer and - * there is no data pending for reading */ - if ((pollfd[i].revents & POLLERR) != 0 || - (pollfd[i].revents & POLLHUP) != 0) { - if ((pollfd[i].events & POLLIN) != 0 && - (pollfd[i].revents & POLLIN) == 0) - usocklnd_conn_kill(conn); - else - usocklnd_exception_handler(conn); - } - - if ((pollfd[i].revents & POLLIN) != 0 && - usocklnd_read_handler(conn) <= 0) - pollfd[i].revents &= ~POLLIN; - - if ((pollfd[i].revents & POLLOUT) != 0 && - usocklnd_write_handler(conn) <= 0) - pollfd[i].revents &= ~POLLOUT; - - if ((pollfd[i].revents & (POLLIN | POLLOUT)) == 0) - skip[prev] = next; /* skip this entry next pass */ - else - prev = i; - - i = next; - } while (i < nfds); - } -} - -int -usocklnd_calculate_chunk_size(int num) -{ - const int n = 4; - const int p = usock_tuns.ut_poll_timeout; - int chunk = num; - - /* chunk should be big enough to detect a timeout on any - * connection within (n+1)/n times the timeout interval - * if we checks every 'p' seconds 'chunk' conns */ - - if (usock_tuns.ut_timeout > n * p) - chunk = (chunk * n * p) / usock_tuns.ut_timeout; - - if (chunk == 0) - chunk = 1; - - return chunk; -} - -void -usocklnd_wakeup_pollthread(int i) -{ - usock_pollthread_t *pt = &usock_data.ud_pollthreads[i]; - int notification = 0; - int rc; - - rc = syscall(SYS_write, pt->upt_notifier_fd, ¬ification, - sizeof(notification)); - - if (rc != sizeof(notification)) - CERROR("Very unlikely event happend: " - "cannot write to notifier fd (rc=%d; errno=%d)\n", - rc, errno); -} diff --git a/lnet/ulnds/socklnd/usocklnd.c b/lnet/ulnds/socklnd/usocklnd.c deleted file mode 100644 index 2e1ba9b2e0cb89a23814ba216c4aa63279b8a15b..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/usocklnd.c +++ /dev/null @@ -1,546 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2007 Cluster File Systems, Inc. - * Author: Maxim Patlasov <maxim@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - */ - -#include "usocklnd.h" -#include <sys/time.h> - -lnd_t the_tcplnd = { - .lnd_type = SOCKLND, - .lnd_startup = usocklnd_startup, - .lnd_shutdown = usocklnd_shutdown, - .lnd_send = usocklnd_send, - .lnd_recv = usocklnd_recv, - .lnd_accept = usocklnd_accept, -}; - -usock_data_t usock_data; -usock_tunables_t usock_tuns = { - .ut_timeout = 50, - .ut_poll_timeout = 1, - .ut_fair_limit = 1, - .ut_npollthreads = 0, - .ut_min_bulk = 1<<10, - .ut_txcredits = 256, - .ut_peertxcredits = 8, - .ut_socknagle = 0, - .ut_sockbufsiz = 0, -}; - -#define MAX_REASONABLE_TIMEOUT 36000 /* 10 hours */ -#define MAX_REASONABLE_NPT 1000 - -int -usocklnd_validate_tunables() -{ - if (usock_tuns.ut_timeout <= 0 || - usock_tuns.ut_timeout > MAX_REASONABLE_TIMEOUT) { - CERROR("USOCK_TIMEOUT: %d is out of reasonable limits\n", - usock_tuns.ut_timeout); - return -1; - } - - if (usock_tuns.ut_poll_timeout <= 0 || - usock_tuns.ut_poll_timeout > MAX_REASONABLE_TIMEOUT) { - CERROR("USOCK_POLL_TIMEOUT: %d is out of reasonable limits\n", - usock_tuns.ut_poll_timeout); - return -1; - } - - if (usock_tuns.ut_fair_limit <= 0) { - CERROR("Invalid USOCK_FAIR_LIMIT: %d (should be >0)\n", - usock_tuns.ut_fair_limit); - return -1; - } - - if (usock_tuns.ut_npollthreads < 0 || - usock_tuns.ut_npollthreads > MAX_REASONABLE_NPT) { - CERROR("USOCK_NPOLLTHREADS: %d is out of reasonable limits\n", - usock_tuns.ut_npollthreads); - return -1; - } - - if (usock_tuns.ut_txcredits <= 0) { - CERROR("USOCK_TXCREDITS: %d should be positive\n", - usock_tuns.ut_txcredits); - return -1; - } - - if (usock_tuns.ut_peertxcredits <= 0) { - CERROR("USOCK_PEERTXCREDITS: %d should be positive\n", - usock_tuns.ut_peertxcredits); - return -1; - } - - if (usock_tuns.ut_peertxcredits > usock_tuns.ut_txcredits) { - CERROR("USOCK_PEERTXCREDITS: %d should not be greater" - " than USOCK_TXCREDITS: %d\n", - usock_tuns.ut_peertxcredits, usock_tuns.ut_txcredits); - return -1; - } - - if (usock_tuns.ut_socknagle != 0 && - usock_tuns.ut_socknagle != 1) { - CERROR("USOCK_SOCKNAGLE: %d should be 0 or 1\n", - usock_tuns.ut_socknagle); - return -1; - } - - if (usock_tuns.ut_sockbufsiz < 0) { - CERROR("USOCK_SOCKBUFSIZ: %d should be 0 or positive\n", - usock_tuns.ut_sockbufsiz); - return -1; - } - - return 0; -} - -void -usocklnd_release_poll_states(int n) -{ - int i; - - for (i = 0; i < n; i++) { - usock_pollthread_t *pt = &usock_data.ud_pollthreads[i]; - - close(pt->upt_notifier_fd); - close(pt->upt_pollfd[0].fd); - - pthread_mutex_destroy(&pt->upt_pollrequests_lock); - cfs_fini_completion(&pt->upt_completion); - - LIBCFS_FREE (pt->upt_pollfd, - sizeof(struct pollfd) * pt->upt_npollfd); - LIBCFS_FREE (pt->upt_idx2conn, - sizeof(usock_conn_t *) * pt->upt_npollfd); - LIBCFS_FREE (pt->upt_fd2idx, - sizeof(int) * pt->upt_nfd2idx); - } -} - -int -usocklnd_update_tunables() -{ - int rc; - - rc = cfs_parse_int_tunable(&usock_tuns.ut_timeout, - "USOCK_TIMEOUT"); - if (rc) - return rc; - - rc = cfs_parse_int_tunable(&usock_tuns.ut_poll_timeout, - "USOCK_POLL_TIMEOUT"); - if (rc) - return rc; - - rc = cfs_parse_int_tunable(&usock_tuns.ut_npollthreads, - "USOCK_NPOLLTHREADS"); - if (rc) - return rc; - - rc = cfs_parse_int_tunable(&usock_tuns.ut_fair_limit, - "USOCK_FAIR_LIMIT"); - if (rc) - return rc; - - rc = cfs_parse_int_tunable(&usock_tuns.ut_min_bulk, - "USOCK_MIN_BULK"); - if (rc) - return rc; - - rc = cfs_parse_int_tunable(&usock_tuns.ut_txcredits, - "USOCK_TXCREDITS"); - if (rc) - return rc; - - rc = cfs_parse_int_tunable(&usock_tuns.ut_peertxcredits, - "USOCK_PEERTXCREDITS"); - if (rc) - return rc; - - rc = cfs_parse_int_tunable(&usock_tuns.ut_socknagle, - "USOCK_SOCKNAGLE"); - if (rc) - return rc; - - rc = cfs_parse_int_tunable(&usock_tuns.ut_sockbufsiz, - "USOCK_SOCKBUFSIZ"); - if (rc) - return rc; - - if (usocklnd_validate_tunables()) - return -EINVAL; - - if (usock_tuns.ut_npollthreads == 0) { - usock_tuns.ut_npollthreads = cfs_online_cpus(); - - if (usock_tuns.ut_npollthreads <= 0) { - CERROR("Cannot find out the number of online CPUs\n"); - return -EINVAL; - } - } - - return 0; -} - - -int -usocklnd_base_startup() -{ - usock_pollthread_t *pt; - int i; - int rc; - - rc = usocklnd_update_tunables(); - if (rc) - return rc; - - usock_data.ud_npollthreads = usock_tuns.ut_npollthreads; - - LIBCFS_ALLOC (usock_data.ud_pollthreads, - usock_data.ud_npollthreads * - sizeof(usock_pollthread_t)); - if (usock_data.ud_pollthreads == NULL) - return -ENOMEM; - - /* Initialize poll thread state structures */ - for (i = 0; i < usock_data.ud_npollthreads; i++) { - int notifier[2]; - - pt = &usock_data.ud_pollthreads[i]; - - rc = -ENOMEM; - - LIBCFS_ALLOC (pt->upt_pollfd, - sizeof(struct pollfd) * UPT_START_SIZ); - if (pt->upt_pollfd == NULL) - goto base_startup_failed_0; - - LIBCFS_ALLOC (pt->upt_idx2conn, - sizeof(usock_conn_t *) * UPT_START_SIZ); - if (pt->upt_idx2conn == NULL) - goto base_startup_failed_1; - - LIBCFS_ALLOC (pt->upt_fd2idx, - sizeof(int) * UPT_START_SIZ); - if (pt->upt_fd2idx == NULL) - goto base_startup_failed_2; - - memset(pt->upt_fd2idx, 0, - sizeof(int) * UPT_START_SIZ); - - LIBCFS_ALLOC (pt->upt_skip, - sizeof(int) * UPT_START_SIZ); - if (pt->upt_skip == NULL) - goto base_startup_failed_3; - - pt->upt_npollfd = pt->upt_nfd2idx = UPT_START_SIZ; - - rc = libcfs_socketpair(notifier); - if (rc != 0) - goto base_startup_failed_4; - - pt->upt_notifier_fd = notifier[0]; - - pt->upt_pollfd[0].fd = notifier[1]; - pt->upt_pollfd[0].events = POLLIN; - pt->upt_pollfd[0].revents = 0; - - pt->upt_nfds = 1; - pt->upt_idx2conn[0] = NULL; - - pt->upt_errno = 0; - CFS_INIT_LIST_HEAD (&pt->upt_pollrequests); - CFS_INIT_LIST_HEAD (&pt->upt_stale_list); - pthread_mutex_init(&pt->upt_pollrequests_lock, NULL); - cfs_init_completion(&pt->upt_completion); - } - - /* Initialize peer hash list */ - for (i = 0; i < UD_PEER_HASH_SIZE; i++) - CFS_INIT_LIST_HEAD(&usock_data.ud_peers[i]); - - pthread_rwlock_init(&usock_data.ud_peers_lock, NULL); - - /* Spawn poll threads */ - for (i = 0; i < usock_data.ud_npollthreads; i++) { - rc = cfs_create_thread(usocklnd_poll_thread, - &usock_data.ud_pollthreads[i]); - if (rc) { - usocklnd_base_shutdown(i); - return rc; - } - } - - usock_data.ud_state = UD_STATE_INITIALIZED; - - return 0; - - base_startup_failed_4: - LIBCFS_FREE (pt->upt_skip, sizeof(int) * UPT_START_SIZ); - base_startup_failed_3: - LIBCFS_FREE (pt->upt_fd2idx, sizeof(int) * UPT_START_SIZ); - base_startup_failed_2: - LIBCFS_FREE (pt->upt_idx2conn, sizeof(usock_conn_t *) * UPT_START_SIZ); - base_startup_failed_1: - LIBCFS_FREE (pt->upt_pollfd, sizeof(struct pollfd) * UPT_START_SIZ); - base_startup_failed_0: - LASSERT(rc != 0); - usocklnd_release_poll_states(i); - LIBCFS_FREE (usock_data.ud_pollthreads, - usock_data.ud_npollthreads * - sizeof(usock_pollthread_t)); - return rc; -} - -void -usocklnd_base_shutdown(int n) -{ - int i; - - usock_data.ud_shutdown = 1; - for (i = 0; i < n; i++) { - usock_pollthread_t *pt = &usock_data.ud_pollthreads[i]; - usocklnd_wakeup_pollthread(i); - cfs_wait_for_completion(&pt->upt_completion); - } - - pthread_rwlock_destroy(&usock_data.ud_peers_lock); - - usocklnd_release_poll_states(usock_data.ud_npollthreads); - - LIBCFS_FREE (usock_data.ud_pollthreads, - usock_data.ud_npollthreads * - sizeof(usock_pollthread_t)); - - usock_data.ud_state = UD_STATE_INIT_NOTHING; -} - -__u64 -usocklnd_new_incarnation() -{ - struct timeval tv; - int rc = gettimeofday(&tv, NULL); - LASSERT (rc == 0); - return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; -} - -static int -usocklnd_assign_ni_nid(lnet_ni_t *ni) -{ - int rc; - int up; - __u32 ipaddr; - - /* Find correct IP-address and update ni_nid with it. - * Two cases are supported: - * 1) no explicit interfaces are defined. NID will be assigned to - * first non-lo interface that is up; - * 2) exactly one explicit interface is defined. For example, - * LNET_NETWORKS='tcp(eth0)' */ - - if (ni->ni_interfaces[0] == NULL) { - char **names; - int i, n; - - n = libcfs_ipif_enumerate(&names); - if (n <= 0) { - CERROR("Can't enumerate interfaces: %d\n", n); - return -1; - } - - for (i = 0; i < n; i++) { - - if (!strcmp(names[i], "lo")) /* skip the loopback IF */ - continue; - - rc = libcfs_ipif_query(names[i], &up, &ipaddr); - if (rc != 0) { - CWARN("Can't get interface %s info: %d\n", - names[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s (down)\n", - names[i]); - continue; - } - - break; /* one address is quite enough */ - } - - libcfs_ipif_free_enumeration(names, n); - - if (i >= n) { - CERROR("Can't find any usable interfaces\n"); - return -1; - } - - CDEBUG(D_NET, "No explicit interfaces defined. " - "%u.%u.%u.%u used\n", HIPQUAD(ipaddr)); - } else { - if (ni->ni_interfaces[1] != NULL) { - CERROR("only one explicit interface is allowed\n"); - return -1; - } - - rc = libcfs_ipif_query(ni->ni_interfaces[0], &up, &ipaddr); - if (rc != 0) { - CERROR("Can't get interface %s info: %d\n", - ni->ni_interfaces[0], rc); - return -1; - } - - if (!up) { - CERROR("Explicit interface defined: %s but is down\n", - ni->ni_interfaces[0]); - return -1; - } - - CDEBUG(D_NET, "Explicit interface defined: %s. " - "%u.%u.%u.%u used\n", - ni->ni_interfaces[0], HIPQUAD(ipaddr)); - - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ipaddr); - - return 0; -} - -int -usocklnd_startup(lnet_ni_t *ni) -{ - int rc; - usock_net_t *net; - - if (usock_data.ud_state == UD_STATE_INIT_NOTHING) { - rc = usocklnd_base_startup(); - if (rc != 0) - return rc; - } - - LIBCFS_ALLOC(net, sizeof(*net)); - if (net == NULL) - goto startup_failed_0; - - memset(net, 0, sizeof(*net)); - net->un_incarnation = usocklnd_new_incarnation(); - pthread_mutex_init(&net->un_lock, NULL); - pthread_cond_init(&net->un_cond, NULL); - - ni->ni_data = net; - - if (!(the_lnet.ln_pid & LNET_PID_USERFLAG)) { - rc = usocklnd_assign_ni_nid(ni); - if (rc != 0) - goto startup_failed_1; - } - - LASSERT (ni->ni_lnd == &the_tcplnd); - - ni->ni_maxtxcredits = usock_tuns.ut_txcredits; - ni->ni_peertxcredits = usock_tuns.ut_peertxcredits; - - usock_data.ud_nets_count++; - return 0; - - startup_failed_1: - pthread_mutex_destroy(&net->un_lock); - pthread_cond_destroy(&net->un_cond); - LIBCFS_FREE(net, sizeof(*net)); - startup_failed_0: - if (usock_data.ud_nets_count == 0) - usocklnd_base_shutdown(usock_data.ud_npollthreads); - - return -ENETDOWN; -} - -void -usocklnd_shutdown(lnet_ni_t *ni) -{ - usock_net_t *net = ni->ni_data; - - net->un_shutdown = 1; - - usocklnd_del_all_peers(ni); - - /* Wait for all peer state to clean up */ - pthread_mutex_lock(&net->un_lock); - while (net->un_peercount != 0) - pthread_cond_wait(&net->un_cond, &net->un_lock); - pthread_mutex_unlock(&net->un_lock); - - /* Release usock_net_t structure */ - pthread_mutex_destroy(&net->un_lock); - pthread_cond_destroy(&net->un_cond); - LIBCFS_FREE(net, sizeof(*net)); - - usock_data.ud_nets_count--; - if (usock_data.ud_nets_count == 0) - usocklnd_base_shutdown(usock_data.ud_npollthreads); -} - -void -usocklnd_del_all_peers(lnet_ni_t *ni) -{ - struct list_head *ptmp; - struct list_head *pnxt; - usock_peer_t *peer; - int i; - - pthread_rwlock_wrlock(&usock_data.ud_peers_lock); - - for (i = 0; i < UD_PEER_HASH_SIZE; i++) { - list_for_each_safe (ptmp, pnxt, &usock_data.ud_peers[i]) { - peer = list_entry (ptmp, usock_peer_t, up_list); - - if (peer->up_ni != ni) - continue; - - usocklnd_del_peer_and_conns(peer); - } - } - - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - - /* wakeup all threads */ - for (i = 0; i < usock_data.ud_npollthreads; i++) - usocklnd_wakeup_pollthread(i); -} - -void -usocklnd_del_peer_and_conns(usock_peer_t *peer) -{ - /* peer cannot disappear because it's still in hash list */ - - pthread_mutex_lock(&peer->up_lock); - /* content of conn[] array cannot change now */ - usocklnd_del_conns_locked(peer); - pthread_mutex_unlock(&peer->up_lock); - - /* peer hash list is still protected by the caller */ - list_del(&peer->up_list); - - usocklnd_peer_decref(peer); /* peer isn't in hash list anymore */ -} - -void -usocklnd_del_conns_locked(usock_peer_t *peer) -{ - int i; - - for (i=0; i < N_CONN_TYPES; i++) { - usock_conn_t *conn = peer->up_conns[i]; - if (conn != NULL) - usocklnd_conn_kill(conn); - } -} diff --git a/lnet/ulnds/socklnd/usocklnd.h b/lnet/ulnds/socklnd/usocklnd.h deleted file mode 100644 index f2abf9d61e1442de5dcfdd821be2f4c8fc21b53f..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/usocklnd.h +++ /dev/null @@ -1,332 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Maxim Patlasov <maxim@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - */ -#define _GNU_SOURCE -#include <pthread.h> -#include <poll.h> -#include <lnet/lib-lnet.h> -#include <lnet/socklnd.h> - -typedef struct { - struct list_head tx_list; /* neccessary to form tx list */ - lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */ - ksock_msg_t tx_msg; /* buffer for wire header of ksock msg */ - int tx_resid; /* # of residual bytes */ - int tx_nob; /* # of packet bytes */ - int tx_size; /* size of this descriptor */ - struct iovec *tx_iov; /* points to tx_iova[i] */ - int tx_niov; /* # of packet iovec frags */ - struct iovec tx_iova[1]; /* iov for header */ -} usock_tx_t; - -struct usock_peer_s; - -typedef struct { - int uc_fd; /* socket */ - int uc_type; /* conn type */ - int uc_activeflag; /* active side of connection? */ - int uc_flip; /* is peer other endian? */ - int uc_state; /* connection state */ - struct usock_peer_s *uc_peer; /* owning peer */ - lnet_process_id_t uc_peerid; /* id of remote peer */ - int uc_pt_idx; /* index in ud_pollthreads[] of - * owning poll thread */ - lnet_ni_t *uc_ni; /* parent NI while accepting */ - struct usock_preq_s *uc_preq; /* preallocated request */ - __u32 uc_peer_ip; /* IP address of the peer */ - __u16 uc_peer_port; /* port of the peer */ - struct list_head uc_stale_list; /* orphaned connections */ - - /* Receive state */ - int uc_rx_state; /* message or hello state */ - ksock_hello_msg_t *uc_rx_hello; /* hello buffer */ - struct iovec *uc_rx_iov; /* points to uc_rx_iova[i] */ - struct iovec uc_rx_iova[LNET_MAX_IOV]; /* message frags */ - int uc_rx_niov; /* # frags */ - int uc_rx_nob_left; /* # bytes to next hdr/body */ - int uc_rx_nob_wanted; /* # of bytes actually wanted */ - void *uc_rx_lnetmsg; /* LNET message being received */ - cfs_time_t uc_rx_deadline; /* when to time out */ - int uc_rx_flag; /* deadline valid? */ - ksock_msg_t uc_rx_msg; /* message buffer */ - - /* Send state */ - struct list_head uc_tx_list; /* pending txs */ - struct list_head uc_zcack_list; /* pending zc_acks */ - cfs_time_t uc_tx_deadline; /* when to time out */ - int uc_tx_flag; /* deadline valid? */ - int uc_sending; /* send op is in progress */ - usock_tx_t *uc_tx_hello; /* fake tx with hello */ - - cfs_atomic_t uc_refcount; /* # of users */ - pthread_mutex_t uc_lock; /* serialize */ - int uc_errored; /* a flag for lnet_notify() */ -} usock_conn_t; - -/* Allowable conn states are: */ -#define UC_CONNECTING 1 -#define UC_SENDING_HELLO 2 -#define UC_RECEIVING_HELLO 3 -#define UC_READY 4 -#define UC_DEAD 5 - -/* Allowable RX states are: */ -#define UC_RX_HELLO_MAGIC 1 -#define UC_RX_HELLO_VERSION 2 -#define UC_RX_HELLO_BODY 3 -#define UC_RX_HELLO_IPS 4 -#define UC_RX_KSM_HEADER 5 -#define UC_RX_LNET_HEADER 6 -#define UC_RX_PARSE 7 -#define UC_RX_PARSE_WAIT 8 -#define UC_RX_LNET_PAYLOAD 9 -#define UC_RX_SKIPPING 10 - -#define N_CONN_TYPES 3 /* CONTROL, BULK_IN and BULK_OUT */ - -typedef struct usock_peer_s { - struct list_head up_list; /* neccessary to form peer list */ - lnet_process_id_t up_peerid; /* id of remote peer */ - usock_conn_t *up_conns[N_CONN_TYPES]; /* conns that connect us - * us with the peer */ - lnet_ni_t *up_ni; /* pointer to parent NI */ - __u64 up_incarnation; /* peer's incarnation */ - int up_incrn_is_set; /* 0 if peer's incarnation - * hasn't been set so far */ - cfs_atomic_t up_refcount; /* # of users */ - pthread_mutex_t up_lock; /* serialize */ - int up_errored; /* a flag for lnet_notify() */ - cfs_time_t up_last_alive; /* when the peer was last alive */ -} usock_peer_t; - -typedef struct { - int upt_notifier_fd; /* notifier fd for writing */ - struct pollfd *upt_pollfd; /* poll fds */ - int upt_nfds; /* active poll fds */ - int upt_npollfd; /* allocated poll fds */ - usock_conn_t **upt_idx2conn; /* conns corresponding to - * upt_pollfd[idx] */ - int *upt_skip; /* skip chain */ - int *upt_fd2idx; /* index into upt_pollfd[] - * by fd */ - int upt_nfd2idx; /* # of allocated elements - * of upt_fd2idx[] */ - struct list_head upt_stale_list; /* list of orphaned conns */ - struct list_head upt_pollrequests; /* list of poll requests */ - pthread_mutex_t upt_pollrequests_lock; /* serialize */ - int upt_errno; /* non-zero if errored */ - struct cfs_completion upt_completion; /* wait/signal facility for - * syncronizing shutdown */ -} usock_pollthread_t; - -/* Number of elements in upt_pollfd[], upt_idx2conn[] and upt_fd2idx[] - * at initialization time. Will be resized on demand */ -#define UPT_START_SIZ 32 - -/* # peer lists */ -#define UD_PEER_HASH_SIZE 101 - -typedef struct { - int ud_state; /* initialization state */ - int ud_npollthreads; /* # of poll threads */ - usock_pollthread_t *ud_pollthreads; /* their state */ - int ud_shutdown; /* shutdown flag */ - int ud_nets_count; /* # of instances */ - struct list_head ud_peers[UD_PEER_HASH_SIZE]; /* peer hash table */ - pthread_rwlock_t ud_peers_lock; /* serialize */ -} usock_data_t; - -extern usock_data_t usock_data; - -/* ud_state allowed values */ -#define UD_STATE_INIT_NOTHING 0 -#define UD_STATE_INITIALIZED 1 - -typedef struct { - int un_peercount; /* # of peers */ - int un_shutdown; /* shutdown flag */ - __u64 un_incarnation; /* my epoch */ - pthread_cond_t un_cond; /* condvar to wait for notifications */ - pthread_mutex_t un_lock; /* a lock to protect un_cond */ -} usock_net_t; - -typedef struct { - int ut_poll_timeout; /* the third arg for poll(2) (seconds) */ - int ut_timeout; /* "stuck" socket timeout (seconds) */ - int ut_npollthreads; /* number of poll thread to spawn */ - int ut_fair_limit; /* how many packets can we receive or transmit - * without calling poll(2) */ - int ut_min_bulk; /* smallest "large" message */ - int ut_txcredits; /* # concurrent sends */ - int ut_peertxcredits; /* # concurrent sends to 1 peer */ - int ut_socknagle; /* Is Nagle alg on ? */ - int ut_sockbufsiz; /* size of socket buffers */ -} usock_tunables_t; - -extern usock_tunables_t usock_tuns; - -typedef struct usock_preq_s { - int upr_type; /* type of requested action */ - short upr_value; /* bitmask of POLLIN and POLLOUT bits */ - usock_conn_t * upr_conn; /* a conn for the sake of which - * action will be performed */ - struct list_head upr_list; /* neccessary to form list */ -} usock_pollrequest_t; - -/* Allowable poll request types are: */ -#define POLL_ADD_REQUEST 1 -#define POLL_DEL_REQUEST 2 -#define POLL_RX_SET_REQUEST 3 -#define POLL_TX_SET_REQUEST 4 -#define POLL_SET_REQUEST 5 - -typedef struct { - struct list_head zc_list; /* neccessary to form zc_ack list */ - __u64 zc_cookie; /* zero-copy cookie */ -} usock_zc_ack_t; - -static inline void -usocklnd_conn_addref(usock_conn_t *conn) -{ - LASSERT (cfs_atomic_read(&conn->uc_refcount) > 0); - cfs_atomic_inc(&conn->uc_refcount); -} - -void usocklnd_destroy_conn(usock_conn_t *conn); - -static inline void -usocklnd_conn_decref(usock_conn_t *conn) -{ - LASSERT (cfs_atomic_read(&conn->uc_refcount) > 0); - if (cfs_atomic_dec_and_test(&conn->uc_refcount)) - usocklnd_destroy_conn(conn); -} - -static inline void -usocklnd_peer_addref(usock_peer_t *peer) -{ - LASSERT (cfs_atomic_read(&peer->up_refcount) > 0); - cfs_atomic_inc(&peer->up_refcount); -} - -void usocklnd_destroy_peer(usock_peer_t *peer); - -static inline void -usocklnd_peer_decref(usock_peer_t *peer) -{ - LASSERT (cfs_atomic_read(&peer->up_refcount) > 0); - if (cfs_atomic_dec_and_test(&peer->up_refcount)) - usocklnd_destroy_peer(peer); -} - -static inline int -usocklnd_ip2pt_idx(__u32 ip) { - return ip % usock_data.ud_npollthreads; -} - -static inline struct list_head * -usocklnd_nid2peerlist(lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % UD_PEER_HASH_SIZE; - - return &usock_data.ud_peers[hash]; -} - -int usocklnd_startup(lnet_ni_t *ni); -void usocklnd_shutdown(lnet_ni_t *ni); -int usocklnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int usocklnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int usocklnd_accept(lnet_ni_t *ni, int sock_fd); - -int usocklnd_poll_thread(void *arg); -int usocklnd_add_pollrequest(usock_conn_t *conn, int type, short value); -void usocklnd_add_killrequest(usock_conn_t *conn); -int usocklnd_process_pollrequest(usock_pollrequest_t *pr, - usock_pollthread_t *pt_data); -void usocklnd_execute_handlers(usock_pollthread_t *pt_data); -int usocklnd_calculate_chunk_size(int num); -void usocklnd_wakeup_pollthread(int i); - -int usocklnd_notifier_handler(int fd); -void usocklnd_exception_handler(usock_conn_t *conn); -int usocklnd_read_handler(usock_conn_t *conn); -int usocklnd_read_msg(usock_conn_t *conn, int *cont_flag); -int usocklnd_handle_zc_req(usock_peer_t *peer, __u64 cookie); -int usocklnd_read_hello(usock_conn_t *conn, int *cont_flag); -int usocklnd_activeconn_hellorecv(usock_conn_t *conn); -int usocklnd_passiveconn_hellorecv(usock_conn_t *conn); -int usocklnd_write_handler(usock_conn_t *conn); -usock_tx_t * usocklnd_try_piggyback(struct list_head *tx_list_p, - struct list_head *zcack_list_p); -int usocklnd_activeconn_hellosent(usock_conn_t *conn); -int usocklnd_passiveconn_hellosent(usock_conn_t *conn); -int usocklnd_send_tx(usock_conn_t *conn, usock_tx_t *tx); -int usocklnd_read_data(usock_conn_t *conn); - -void usocklnd_release_poll_states(int n); -int usocklnd_base_startup(); -void usocklnd_base_shutdown(int n); -__u64 usocklnd_new_incarnation(); -void usocklnd_del_all_peers(lnet_ni_t *ni); -void usocklnd_del_peer_and_conns(usock_peer_t *peer); -void usocklnd_del_conns_locked(usock_peer_t *peer); - -int usocklnd_conn_timed_out(usock_conn_t *conn, cfs_time_t current_time); -void usocklnd_conn_kill(usock_conn_t *conn); -void usocklnd_conn_kill_locked(usock_conn_t *conn); -usock_conn_t *usocklnd_conn_allocate(); -void usocklnd_conn_free(usock_conn_t *conn); -void usocklnd_tear_peer_conn(usock_conn_t *conn); -void usocklnd_check_peer_stale(lnet_ni_t *ni, lnet_process_id_t id); -int usocklnd_create_passive_conn(lnet_ni_t *ni, int fd, usock_conn_t **connp); -int usocklnd_create_active_conn(usock_peer_t *peer, int type, - usock_conn_t **connp); -int usocklnd_connect_srv_mode(int *fdp, __u32 dst_ip, __u16 dst_port); -int usocklnd_connect_cli_mode(int *fdp, __u32 dst_ip, __u16 dst_port); -int usocklnd_set_sock_options(int fd); -void usocklnd_init_msg(ksock_msg_t *msg, int type); -usock_tx_t *usocklnd_create_noop_tx(__u64 cookie); -usock_tx_t *usocklnd_create_tx(lnet_msg_t *lntmsg); -void usocklnd_init_hello_msg(ksock_hello_msg_t *hello, - lnet_ni_t *ni, int type, lnet_nid_t peer_nid); -usock_tx_t *usocklnd_create_hello_tx(lnet_ni_t *ni, - int type, lnet_nid_t peer_nid); -usock_tx_t *usocklnd_create_cr_hello_tx(lnet_ni_t *ni, - int type, lnet_nid_t peer_nid); -void usocklnd_destroy_tx(lnet_ni_t *ni, usock_tx_t *tx); -void usocklnd_destroy_txlist(lnet_ni_t *ni, struct list_head *txlist); -void usocklnd_destroy_zcack_list(struct list_head *zcack_list); -void usocklnd_destroy_peer (usock_peer_t *peer); -int usocklnd_get_conn_type(lnet_msg_t *lntmsg); -int usocklnd_type2idx(int type); -usock_peer_t *usocklnd_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id); -int usocklnd_create_peer(lnet_ni_t *ni, lnet_process_id_t id, - usock_peer_t **peerp); -int usocklnd_find_or_create_peer(lnet_ni_t *ni, lnet_process_id_t id, - usock_peer_t **peerp); -int usocklnd_find_or_create_conn(usock_peer_t *peer, int type, - usock_conn_t **connp, - usock_tx_t *tx, usock_zc_ack_t *zc_ack, - int *send_immediately_flag); -void usocklnd_link_conn_to_peer(usock_conn_t *conn, usock_peer_t *peer, int idx); -int usocklnd_invert_type(int type); -void usocklnd_conn_new_state(usock_conn_t *conn, int new_state); -void usocklnd_cleanup_stale_conns(usock_peer_t *peer, __u64 incrn, - usock_conn_t *skip_conn); - -void usocklnd_rx_hellomagic_state_transition(usock_conn_t *conn); -void usocklnd_rx_helloversion_state_transition(usock_conn_t *conn); -void usocklnd_rx_hellobody_state_transition(usock_conn_t *conn); -void usocklnd_rx_helloIPs_state_transition(usock_conn_t *conn); -void usocklnd_rx_lnethdr_state_transition(usock_conn_t *conn); -void usocklnd_rx_ksmhdr_state_transition(usock_conn_t *conn); -void usocklnd_rx_skipping_state_transition(usock_conn_t *conn); diff --git a/lnet/ulnds/socklnd/usocklnd_cb.c b/lnet/ulnds/socklnd/usocklnd_cb.c deleted file mode 100644 index c1337e9c28199b64c00578f05f6dc6fbd250dadf..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/usocklnd_cb.c +++ /dev/null @@ -1,176 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Maxim Patlasov <maxim@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - */ - -#include "usocklnd.h" - -static int -usocklnd_send_tx_immediately(usock_conn_t *conn, usock_tx_t *tx) -{ - int rc; - int rc2; - int partial_send = 0; - usock_peer_t *peer = conn->uc_peer; - - LASSERT (peer != NULL); - - /* usocklnd_enqueue_tx() turned it on for us */ - LASSERT(conn->uc_sending); - - //counter_imm_start++; - rc = usocklnd_send_tx(conn, tx); - if (rc == 0) { /* partial send or connection closed */ - pthread_mutex_lock(&conn->uc_lock); - list_add(&tx->tx_list, &conn->uc_tx_list); - conn->uc_sending = 0; - pthread_mutex_unlock(&conn->uc_lock); - partial_send = 1; - } else { - usocklnd_destroy_tx(peer->up_ni, tx); - /* NB: lnetmsg was finalized, so we *must* return 0 */ - - if (rc < 0) { /* real error */ - usocklnd_conn_kill(conn); - return 0; - } - - /* rc == 1: tx was sent completely */ - rc = 0; /* let's say to caller 'Ok' */ - //counter_imm_complete++; - } - - pthread_mutex_lock(&conn->uc_lock); - conn->uc_sending = 0; - - /* schedule write handler */ - if (partial_send || - (conn->uc_state == UC_READY && - (!list_empty(&conn->uc_tx_list) || - !list_empty(&conn->uc_zcack_list)))) { - conn->uc_tx_deadline = - cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - rc2 = usocklnd_add_pollrequest(conn, POLL_TX_SET_REQUEST, POLLOUT); - if (rc2 != 0) - usocklnd_conn_kill_locked(conn); - else - usocklnd_wakeup_pollthread(conn->uc_pt_idx); - } - - pthread_mutex_unlock(&conn->uc_lock); - - return rc; -} - -int -usocklnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - usock_tx_t *tx; - lnet_process_id_t target = lntmsg->msg_target; - usock_peer_t *peer; - int type; - int rc; - usock_conn_t *conn; - int send_immediately; - - tx = usocklnd_create_tx(lntmsg); - if (tx == NULL) - return -ENOMEM; - - rc = usocklnd_find_or_create_peer(ni, target, &peer); - if (rc) { - LIBCFS_FREE (tx, tx->tx_size); - return rc; - } - /* peer cannot disappear now because its refcount was incremented */ - - type = usocklnd_get_conn_type(lntmsg); - rc = usocklnd_find_or_create_conn(peer, type, &conn, tx, NULL, - &send_immediately); - if (rc != 0) { - usocklnd_peer_decref(peer); - usocklnd_check_peer_stale(ni, target); - LIBCFS_FREE (tx, tx->tx_size); - return rc; - } - /* conn cannot disappear now because its refcount was incremented */ - - if (send_immediately) - rc = usocklnd_send_tx_immediately(conn, tx); - - usocklnd_conn_decref(conn); - usocklnd_peer_decref(peer); - return rc; -} - -int -usocklnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - int rc = 0; - usock_conn_t *conn = (usock_conn_t *)private; - - /* I don't think that we'll win much concurrency moving lock() - * call below lnet_extract_iov() */ - pthread_mutex_lock(&conn->uc_lock); - - conn->uc_rx_lnetmsg = msg; - conn->uc_rx_nob_wanted = mlen; - conn->uc_rx_nob_left = rlen; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_niov = - lnet_extract_iov(LNET_MAX_IOV, conn->uc_rx_iov, - niov, iov, offset, mlen); - - /* the gap between lnet_parse() and usocklnd_recv() happened? */ - if (conn->uc_rx_state == UC_RX_PARSE_WAIT) { - conn->uc_rx_flag = 1; /* waiting for incoming lnet payload */ - conn->uc_rx_deadline = - cfs_time_shift(usock_tuns.ut_timeout); - rc = usocklnd_add_pollrequest(conn, POLL_RX_SET_REQUEST, POLLIN); - if (rc != 0) { - usocklnd_conn_kill_locked(conn); - goto recv_out; - } - usocklnd_wakeup_pollthread(conn->uc_pt_idx); - } - - conn->uc_rx_state = UC_RX_LNET_PAYLOAD; - recv_out: - pthread_mutex_unlock(&conn->uc_lock); - usocklnd_conn_decref(conn); - return rc; -} - -int -usocklnd_accept(lnet_ni_t *ni, int sock_fd) -{ - int rc; - usock_conn_t *conn; - - rc = usocklnd_create_passive_conn(ni, sock_fd, &conn); - if (rc) - return rc; - LASSERT(conn != NULL); - - /* disable shutdown event temporarily */ - lnet_ni_addref(ni); - - rc = usocklnd_add_pollrequest(conn, POLL_ADD_REQUEST, POLLIN); - if (rc == 0) - usocklnd_wakeup_pollthread(conn->uc_pt_idx); - - /* NB: conn reference counter was incremented while adding - * poll request if rc == 0 */ - - usocklnd_conn_decref(conn); /* should destroy conn if rc != 0 */ - return rc; -} diff --git a/lnet/utils/.cvsignore b/lnet/utils/.cvsignore deleted file mode 100644 index eae12d5ab792150fe8b615194e8f3fb008fd7e1d..0000000000000000000000000000000000000000 --- a/lnet/utils/.cvsignore +++ /dev/null @@ -1,12 +0,0 @@ -Makefile -Makefile.in -acceptor -debugctl -ptlctl -.deps -routerstat -wirecheck -gmlndnid -lst -lstclient -.*.cmd diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am deleted file mode 100644 index 71d49b54d9739c50cbee0f23812fbeac53794f88..0000000000000000000000000000000000000000 --- a/lnet/utils/Makefile.am +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -## $(srcdir)/../ for <portals/*.h>, ../../ for generated <config.h> -#COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I../../include -#LINK = $(CC) -o $@ - -if LIBLUSTRE -noinst_LIBRARIES = libuptlctl.a -endif - -libuptlctl_a_SOURCES = portals.c nidstrings.c debug.c l_ioctl.c -libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS) -libuptlctl_a_CFLAGS = $(LLCFLAGS) -DLUSTRE_UTILS=1 - -sbin_PROGRAMS = debugctl - -lib_LIBRARIES = libptlctl.a - -if LIBLUSTRE -noinst_LIBRARIES += liblst.a -liblst_a_SOURCES = -endif - -libptlctl_a_SOURCES = portals.c nidstrings.c debug.c l_ioctl.c parser.c parser.h - -if UTILS -sbin_PROGRAMS += ptlctl routerstat wirecheck lst -if LIBLUSTRE -sbin_PROGRAMS += lstclient -endif - -if BUILD_GMLND -sbin_PROGRAMS += gmlndnid -endif -endif - -wirecheck_SOURCES = wirecheck.c - -gmlndnid_SOURCES = gmlndnid.c -gmlndnid_CFLAGS = $(GMCPPFLAGS) -gmlndnid_LDFLAGS = -static -gmlndnid_LDADD = $(GMLIBS) -lgm - -ptlctl_SOURCES = ptlctl.c -ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) -ptlctl_DEPENDENCIES = libptlctl.a - -routerstat_SOURCES = routerstat.c - -debugctl_SOURCES = debugctl.c -debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) -debugctl_DEPENDENCIES = libptlctl.a - -lst_SOURCES = lst.c -lst_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) -lst_DEPENDENCIES = libptlctl.a - -LND_LIBS = -if BUILD_USOCKLND -LND_LIBS += $(top_builddir)/lnet/ulnds/socklnd/libsocklnd.a -endif -if BUILD_UPTLLND -LND_LIBS += $(top_builddir)/lnet/ulnds/ptllnd/libptllnd.a -endif - -if LIBLUSTRE -LIB_SELFTEST = $(top_builddir)/lnet/libcfs/libcfs.a $(top_builddir)/lnet/lnet/liblnet.a $(top_builddir)/lnet/selftest/libselftest.a -liblst.a : $(LIB_SELFTEST) $(LND_LIBS) - sh $(srcdir)/genlib.sh "$(LIBS)" "$(LND_LIBS)" "$(PTHREAD_LIBS)" - -lstclient_SOURCES = lstclient.c -lstclient_LDADD = -L. -lptlctl -llst $(LIBREADLINE) $(LIBEFENCE) $(PTHREAD_LIBS) -lstclient_DEPENDENCIES = libptlctl.a liblst.a -endif - -nidstrings.c: @top_srcdir@/lnet/libcfs/nidstrings.c - ln -sf $< $@ - -EXTRA_DIST = genlib.sh diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c deleted file mode 100644 index 39af760e137bbde49bc3ffbceb4cf49fdc3b9eed..0000000000000000000000000000000000000000 --- a/lnet/utils/debug.c +++ /dev/null @@ -1,927 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre Networking, http://www.lustre.org. - * - * LNET is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * LNET is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with LNET; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Some day I'll split all of this functionality into a cfs_debug module - * of its own. That day is not today. - * - */ - -#define __USE_FILE_OFFSET64 -#define _GNU_SOURCE - -#include <stdio.h> -#ifdef HAVE_NETDB_H -#include <netdb.h> -#endif -#include <stdlib.h> -#include <string.h> -#ifdef HAVE_SYS_IOCTL_H -#include <sys/ioctl.h> -#endif -#ifndef _IOWR -#include "ioctl.h" -#endif -#include <fcntl.h> -#include <errno.h> -#include <unistd.h> -#include <assert.h> - -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <sys/utsname.h> - -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> -#include <libcfs/portals_utils.h> -#include "parser.h" - -#include <time.h> - -static char rawbuf[8192]; -static char *buf = rawbuf; -static int max = 8192; -/*static int g_pfd = -1;*/ -static int subsystem_mask = ~0; -static int debug_mask = ~0; - -#define MAX_MARK_SIZE 256 - -static const char *libcfs_debug_subsystems[] = - {"undefined", "mdc", "mds", "osc", - "ost", "class", "log", "llite", - "rpc", "mgmt", "lnet", "lnd", - "pinger", "filter", "", "echo", - "ldlm", "lov", "", "", - "", "", "", "lmv", - "", "sec", "gss", "", - "mgc", "mgs", "fid", "fld", NULL}; -static const char *libcfs_debug_masks[] = - {"trace", "inode", "super", "ext2", - "malloc", "cache", "info", "ioctl", - "neterror", "net", "warning", "buffs", - "other", "dentry", "nettrace", "page", - "dlmtrace", "error", "emerg", "ha", - "rpctrace", "vfstrace", "reada", "mmap", - "config", "console", "quota", "sec", NULL}; - -struct debug_daemon_cmd { - char *cmd; - unsigned int cmdv; -}; - -static const struct debug_daemon_cmd libcfs_debug_daemon_cmd[] = { - {"start", DEBUG_DAEMON_START}, - {"stop", DEBUG_DAEMON_STOP}, - {0, 0} -}; - -#ifdef __linux__ - -#define DAEMON_CTL_NAME "/proc/sys/lnet/daemon_file" -#define SUBSYS_DEBUG_CTL_NAME "/proc/sys/lnet/subsystem_debug" -#define DEBUG_CTL_NAME "/proc/sys/lnet/debug" -#define DUMP_KERNEL_CTL_NAME "/proc/sys/lnet/dump_kernel" - -static int -dbg_open_ctlhandle(const char *str) -{ - int fd; - fd = open(str, O_WRONLY); - if (fd < 0) { - fprintf(stderr, "open %s failed: %s\n", str, - strerror(errno)); - return -1; - } - return fd; -} - -static void -dbg_close_ctlhandle(int fd) -{ - close(fd); -} - -static int -dbg_write_cmd(int fd, char *str, int len) -{ - int rc = write(fd, str, len); - - return (rc == len ? 0 : 1); -} - -#elif defined(__DARWIN__) - -#define DAEMON_CTL_NAME "lnet.trace_daemon" -#define SUBSYS_DEBUG_CTL_NAME "lnet.subsystem_debug" -#define DEBUG_CTL_NAME "lnet.debug" -#define DUMP_KERNEL_CTL_NAME "lnet.trace_dumpkernel" - -static char sysctl_name[128]; -static int -dbg_open_ctlhandle(const char *str) -{ - - if (strlen(str)+1 > 128) { - fprintf(stderr, "sysctl name is too long: %s.\n", str); - return -1; - } - strcpy(sysctl_name, str); - - return 0; -} - -static void -dbg_close_ctlhandle(int fd) -{ - sysctl_name[0] = '\0'; - return; -} - -static int -dbg_write_cmd(int fd, char *str, int len) -{ - int rc; - - rc = sysctlbyname(sysctl_name, NULL, NULL, str, len+1); - if (rc != 0) { - fprintf(stderr, "sysctl %s with cmd (%s) error: %d\n", - sysctl_name, str, errno); - } - return (rc == 0 ? 0: 1); -} - -#else -#error - Unknown sysctl convention. -#endif - -static int do_debug_mask(char *name, int enable) -{ - int found = 0, i; - - for (i = 0; libcfs_debug_subsystems[i] != NULL; i++) { - if (strcasecmp(name, libcfs_debug_subsystems[i]) == 0 || - strcasecmp(name, "all_subs") == 0) { - printf("%s output from subsystem \"%s\"\n", - enable ? "Enabling" : "Disabling", - libcfs_debug_subsystems[i]); - if (enable) - subsystem_mask |= (1 << i); - else - subsystem_mask &= ~(1 << i); - found = 1; - } - } - for (i = 0; libcfs_debug_masks[i] != NULL; i++) { - if (strcasecmp(name, libcfs_debug_masks[i]) == 0 || - strcasecmp(name, "all_types") == 0) { - printf("%s output of type \"%s\"\n", - enable ? "Enabling" : "Disabling", - libcfs_debug_masks[i]); - if (enable) - debug_mask |= (1 << i); - else - debug_mask &= ~(1 << i); - found = 1; - } - } - - return found; -} - -int dbg_initialize(int argc, char **argv) -{ - return 0; -} - -int jt_dbg_filter(int argc, char **argv) -{ - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n", - argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) - if (!do_debug_mask(argv[i], 0)) - fprintf(stderr, "Unknown subsystem or debug type: %s\n", - argv[i]); - return 0; -} - -int jt_dbg_show(int argc, char **argv) -{ - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n", - argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) - if (!do_debug_mask(argv[i], 1)) - fprintf(stderr, "Unknown subsystem or debug type: %s\n", - argv[i]); - - return 0; -} - -static int applymask(char* procpath, int value) -{ - int rc; - char buf[64]; - int len = snprintf(buf, 64, "%d", value); - - int fd = dbg_open_ctlhandle(procpath); - if (fd == -1) { - fprintf(stderr, "Unable to open %s: %s\n", - procpath, strerror(errno)); - return fd; - } - rc = dbg_write_cmd(fd, buf, len+1); - if (rc != 0) { - fprintf(stderr, "Write to %s failed: %s\n", - procpath, strerror(errno)); - return rc; - } - dbg_close_ctlhandle(fd); - return 0; -} - -static void applymask_all(unsigned int subs_mask, unsigned int debug_mask) -{ - if (!dump_filename) { - applymask(SUBSYS_DEBUG_CTL_NAME, subs_mask); - applymask(DEBUG_CTL_NAME, debug_mask); - } else { - struct libcfs_debug_ioctl_data data; - - data.hdr.ioc_len = sizeof(data); - data.hdr.ioc_version = 0; - data.subs = subs_mask; - data.debug = debug_mask; - - dump(OBD_DEV_ID, LIBCFS_IOC_DEBUG_MASK, &data); - } - printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/lnet\n", - subs_mask, debug_mask); -} - -int jt_dbg_list(int argc, char **argv) -{ - int i; - - if (argc != 2) { - fprintf(stderr, "usage: %s <subs || types>\n", argv[0]); - return 0; - } - - if (strcasecmp(argv[1], "subs") == 0) { - printf("Subsystems: all_subs"); - for (i = 0; libcfs_debug_subsystems[i] != NULL; i++) - if (libcfs_debug_subsystems[i][0]) - printf(", %s", libcfs_debug_subsystems[i]); - printf("\n"); - } else if (strcasecmp(argv[1], "types") == 0) { - printf("Types: all_types"); - for (i = 0; libcfs_debug_masks[i] != NULL; i++) - printf(", %s", libcfs_debug_masks[i]); - printf("\n"); - } else if (strcasecmp(argv[1], "applymasks") == 0) { - applymask_all(subsystem_mask, debug_mask); - } - return 0; -} - -/* all strings nul-terminated; only the struct and hdr need to be freed */ -struct dbg_line { - struct ptldebug_header *hdr; - char *file; - char *fn; - char *text; -}; - -static int cmp_rec(const void *p1, const void *p2) -{ - struct dbg_line *d1 = *(struct dbg_line **)p1; - struct dbg_line *d2 = *(struct dbg_line **)p2; - - if (d1->hdr->ph_sec < d2->hdr->ph_sec) - return -1; - if (d1->hdr->ph_sec == d2->hdr->ph_sec && - d1->hdr->ph_usec < d2->hdr->ph_usec) - return -1; - if (d1->hdr->ph_sec == d2->hdr->ph_sec && - d1->hdr->ph_usec == d2->hdr->ph_usec) - return 0; - return 1; -} - -static void print_rec(struct dbg_line **linev, int used, FILE *out) -{ - int i; - - for (i = 0; i < used; i++) { - struct dbg_line *line = linev[i]; - struct ptldebug_header *hdr = line->hdr; - - fprintf(out, "%08x:%08x:%u:%u.%06llu:%u:%u:%u:(%s:%u:%s()) %s", - hdr->ph_subsys, hdr->ph_mask, hdr->ph_cpu_id, - hdr->ph_sec, (unsigned long long)hdr->ph_usec, - hdr->ph_stack, hdr->ph_pid, hdr->ph_extern_pid, - line->file, hdr->ph_line_num, line->fn, line->text); - free(line->hdr); - free(line); - } - free(linev); -} - -static int add_rec(struct dbg_line *line, struct dbg_line ***linevp, int *lenp, - int used) -{ - struct dbg_line **linev = *linevp; - - if (used == *lenp) { - int nlen = *lenp + 512; - int nsize = nlen * sizeof(struct dbg_line *); - - linev = *linevp ? realloc(*linevp, nsize) : malloc(nsize); - if (!linev) - return 0; - *linevp = linev; - *lenp = nlen; - } - linev[used] = line; - return 1; -} - -static int parse_buffer(FILE *in, FILE *out) -{ - struct dbg_line *line; - struct ptldebug_header *hdr; - char buf[4097], *p; - int rc; - unsigned long dropped = 0, kept = 0; - struct dbg_line **linev = NULL; - int linev_len = 0; - - while (1) { - rc = fread(buf, sizeof(hdr->ph_len) + sizeof(hdr->ph_flags), 1, in); - if (rc <= 0) - break; - - hdr = (void *)buf; - if (hdr->ph_len == 0) - break; - if (hdr->ph_len > 4094) { - fprintf(stderr, "unexpected large record: %d bytes. " - "aborting.\n", - hdr->ph_len); - break; - } - - rc = fread(buf + sizeof(hdr->ph_len) + sizeof(hdr->ph_flags), 1, - hdr->ph_len - sizeof(hdr->ph_len) - sizeof(hdr->ph_flags), in); - if (rc <= 0) - break; - - if (hdr->ph_mask && - (!(subsystem_mask & hdr->ph_subsys) || - (!(debug_mask & hdr->ph_mask)))) { - dropped++; - continue; - } - - line = malloc(sizeof(*line)); - if (line == NULL) { - fprintf(stderr, "malloc failed; printing accumulated " - "records and exiting.\n"); - break; - } - - line->hdr = malloc(hdr->ph_len + 1); - if (line->hdr == NULL) { - free(line); - fprintf(stderr, "malloc failed; printing accumulated " - "records and exiting.\n"); - break; - } - - p = (void *)line->hdr; - memcpy(line->hdr, buf, hdr->ph_len); - p[hdr->ph_len] = '\0'; - - p += sizeof(*hdr); - line->file = p; - p += strlen(line->file) + 1; - line->fn = p; - p += strlen(line->fn) + 1; - line->text = p; - - if (!add_rec(line, &linev, &linev_len, kept)) { - fprintf(stderr, "malloc failed; printing accumulated " - "records and exiting.\n"); - break; - } - kept++; - } - - if (linev) { - qsort(linev, kept, sizeof(struct dbg_line *), cmp_rec); - print_rec(linev, kept, out); - } - - printf("Debug log: %lu lines, %lu kept, %lu dropped.\n", - dropped + kept, kept, dropped); - return 0; -} - -int jt_dbg_debug_kernel(int argc, char **argv) -{ - char filename[4096]; - struct stat st; - int rc, raw = 0, fd; - FILE *in, *out = stdout; - - if (argc > 3) { - fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]); - return 0; - } - - if (argc > 2) { - raw = atoi(argv[2]); - } else if (argc > 1 && (argv[1][0] == '0' || argv[1][0] == '1')) { - raw = atoi(argv[1]); - argc--; - } - - /* If we are dumping raw (which means no conversion step to ASCII) - * then dump directly to any supplied filename, otherwise this is - * just a temp file and we dump to the real file at convert time. */ - if (argc > 1 && raw) - strcpy(filename, argv[1]); - else - sprintf(filename, "/tmp/lustre-log."CFS_TIME_T".%u", - time(NULL),getpid()); - - if (stat(filename, &st) == 0 && S_ISREG(st.st_mode)) - unlink(filename); - - fd = dbg_open_ctlhandle(DUMP_KERNEL_CTL_NAME); - if (fd < 0) { - fprintf(stderr, "open(dump_kernel) failed: %s\n", - strerror(errno)); - return 1; - } - - rc = dbg_write_cmd(fd, filename, strlen(filename)); - if (rc != 0) { - fprintf(stderr, "write(%s) failed: %s\n", filename, - strerror(errno)); - close(fd); - return 1; - } - dbg_close_ctlhandle(fd); - - if (raw) - return 0; - - in = fopen(filename, "r"); - if (in == NULL) { - if (errno == ENOENT) /* no dump file created */ - return 0; - - fprintf(stderr, "fopen(%s) failed: %s\n", filename, - strerror(errno)); - return 1; - } - if (argc > 1) { - out = fopen(argv[1], "w"); - if (out == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], - strerror(errno)); - fclose(in); - return 1; - } - } - - rc = parse_buffer(in, out); - fclose(in); - if (argc > 1) - fclose(out); - if (rc) { - fprintf(stderr, "parse_buffer failed; leaving tmp file %s " - "behind.\n", filename); - } else { - rc = unlink(filename); - if (rc) - fprintf(stderr, "dumped successfully, but couldn't " - "unlink tmp file %s: %s\n", filename, - strerror(errno)); - } - return rc; -} - -int jt_dbg_debug_file(int argc, char **argv) -{ - int fdin; - int fdout; - FILE *in; - FILE *out = stdout; - int rc; - - if (argc > 3 || argc < 2) { - fprintf(stderr, "usage: %s <input> [output]\n", argv[0]); - return 0; - } - - fdin = open(argv[1], O_RDONLY | O_LARGEFILE); - if (fdin == -1) { - fprintf(stderr, "open(%s) failed: %s\n", argv[1], - strerror(errno)); - return 1; - } - in = fdopen(fdin, "r"); - if (in == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], - strerror(errno)); - close(fdin); - return 1; - } - if (argc > 2) { - fdout = open(argv[2], - O_CREAT | O_TRUNC | O_WRONLY | O_LARGEFILE, - 0600); - if (fdout == -1) { - fprintf(stderr, "open(%s) failed: %s\n", argv[2], - strerror(errno)); - fclose(in); - return 1; - } - out = fdopen(fdout, "w"); - if (out == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[2], - strerror(errno)); - fclose(in); - close(fdout); - return 1; - } - } - - rc = parse_buffer(in, out); - - fclose(in); - if (out != stdout) - fclose(out); - - return rc; -} - -const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n"; - -int jt_dbg_debug_daemon(int argc, char **argv) -{ - int rc; - int fd; - - if (argc <= 1) { - fprintf(stderr, debug_daemon_usage, argv[0]); - return 1; - } - - fd = dbg_open_ctlhandle(DAEMON_CTL_NAME); - if (fd < 0) - return -1; - - rc = -1; - if (strcasecmp(argv[1], "start") == 0) { - if (argc < 3 || argc > 4 || - (argc == 4 && strlen(argv[3]) > 5)) { - fprintf(stderr, debug_daemon_usage, argv[0]); - goto out; - } - if (argc == 4) { - char buf[12]; - const long min_size = 10; - const long max_size = 20480; - long size; - char *end; - - size = strtoul(argv[3], &end, 0); - if (size < min_size || - size > max_size || - *end != 0) { - fprintf(stderr, "size %s invalid, must be in " - "the range %ld-%ld MB\n", argv[3], - min_size, max_size); - goto out; - } - snprintf(buf, sizeof(buf), "size=%ld", size); - rc = dbg_write_cmd(fd, buf, strlen(buf)); - - if (rc != 0) { - fprintf(stderr, "set %s failed: %s\n", - buf, strerror(errno)); - goto out; - } - } - - rc = dbg_write_cmd(fd, argv[2], strlen(argv[2])); - if (rc != 0) { - fprintf(stderr, "start debug_daemon on %s failed: %s\n", - argv[2], strerror(errno)); - goto out; - } - rc = 0; - goto out; - } - if (strcasecmp(argv[1], "stop") == 0) { - rc = dbg_write_cmd(fd, "stop", 4); - if (rc != 0) { - fprintf(stderr, "stopping debug_daemon failed: %s\n", - strerror(errno)); - goto out; - } - - rc = 0; - goto out; - } - - fprintf(stderr, debug_daemon_usage, argv[0]); - rc = -1; -out: - dbg_close_ctlhandle(fd); - return rc; -} - -int jt_dbg_clear_debug_buf(int argc, char **argv) -{ - int rc; - struct libcfs_ioctl_data data; - - if (argc != 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - memset(&data, 0, sizeof(data)); - if (libcfs_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "libcfs_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CLEAR_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_LIBCFS_CLEAR_DEBUG failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -int jt_dbg_mark_debug_buf(int argc, char **argv) -{ - static char scratch[MAX_MARK_SIZE] = { '\0' }; - int rc, max_size = MAX_MARK_SIZE-1; - struct libcfs_ioctl_data data = { 0 }; - char *text; - time_t now = time(NULL); - - if (argc > 1) { - int count; - text = scratch; - strncpy(text, argv[1], max_size); - max_size-=strlen(argv[1]); - for (count = 2; (count < argc) && (max_size > 0); count++){ - strncat(text, " ", max_size); - max_size -= 1; - strncat(text, argv[count], max_size); - max_size -= strlen(argv[count]); - } - } else { - text = ctime(&now); - } - - data.ioc_inllen1 = strlen(text) + 1; - data.ioc_inlbuf1 = text; - if (libcfs_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "libcfs_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_MARK_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_LIBCFS_MARK_DEBUG failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -static struct mod_paths { - char *name, *path; -} mod_paths[] = { - {"libcfs", "lnet/libcfs"}, - {"lnet", "lnet/lnet"}, - {"kciblnd", "lnet/klnds/ciblnd"}, - {"kgmlnd", "lnet/klnds/gmlnd"}, - {"kmxlnd", "lnet/klnds/mxlnd"}, - {"kiiblnd", "lnet/klnds/iiblnd"}, - {"ko2iblnd", "lnet/klnds/o2iblnd"}, - {"kopeniblnd", "lnet/klnds/openiblnd"}, - {"kptllnd", "lnet/klnds/ptllnd"}, - {"kqswlnd", "lnet/klnds/qswlnd"}, - {"kralnd", "lnet/klnds/ralnd"}, - {"ksocklnd", "lnet/klnds/socklnd"}, - {"ktdilnd", "lnet/klnds/tdilnd"}, - {"kviblnd", "lnet/klnds/viblnd"}, - {"lvfs", "lustre/lvfs"}, - {"obdclass", "lustre/obdclass"}, - {"llog_test", "lustre/obdclass"}, - {"ptlrpc_gss", "lustre/ptlrpc/gss"}, - {"ptlrpc", "lustre/ptlrpc"}, - {"gks", "lustre/sec/gks"}, - {"gkc", "lustre/sec/gks"}, - {"ost", "lustre/ost"}, - {"osc", "lustre/osc"}, - {"mds", "lustre/mds"}, - {"mdc", "lustre/mdc"}, - {"llite", "lustre/llite"}, - {"lustre", "lustre/llite"}, - {"llite_lloop", "lustre/llite"}, - {"ldiskfs", "ldiskfs/ldiskfs"}, - {"smfs", "lustre/smfs"}, - {"obdecho", "lustre/obdecho"}, - {"ldlm", "lustre/ldlm"}, - {"obdfilter", "lustre/obdfilter"}, - {"lov", "lustre/lov"}, - {"lmv", "lustre/lmv"}, - {"fsfilt_ext3", "lustre/lvfs"}, - {"fsfilt_reiserfs", "lustre/lvfs"}, - {"fsfilt_smfs", "lustre/lvfs"}, - {"fsfilt_ldiskfs", "lustre/lvfs"}, - {"mds_ext3", "lustre/mds"}, - {"cobd", "lustre/cobd"}, - {"cmobd", "lustre/cmobd"}, - {"lquota", "lustre/quota"}, - {"mgs", "lustre/mgs"}, - {"mgc", "lustre/mgc"}, - {"mdt", "lustre/mdt"}, - {"mdd", "lustre/mdd"}, - {"osd", "lustre/osd"}, - {"cmm", "lustre/cmm"}, - {"fid", "lustre/fid"}, - {"fld", "lustre/fld"}, - {NULL, NULL} -}; - -static int jt_dbg_modules_2_4(int argc, char **argv) -{ -#ifdef HAVE_LINUX_VERSION_H -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - struct mod_paths *mp; - char *path = ""; - char *kernel = "linux"; - - if (argc >= 2) - path = argv[1]; - if (argc == 3) - kernel = argv[2]; - if (argc > 3) { - printf("%s [path] [kernel]\n", argv[0]); - return 0; - } - - for (mp = mod_paths; mp->name != NULL; mp++) { - struct module_info info; - int rc; - size_t crap; - int query_module(const char *name, int which, void *buf, - size_t bufsize, size_t *ret); - - rc = query_module(mp->name, QM_INFO, &info, sizeof(info), - &crap); - if (rc < 0) { - if (errno != ENOENT) - printf("query_module(%s) failed: %s\n", - mp->name, strerror(errno)); - } else { - printf("add-symbol-file %s%s%s/%s.o 0x%0lx\n", path, - path[0] ? "/" : "", mp->path, mp->name, - info.addr + sizeof(struct module)); - } - } - - return 0; -#endif // Headers are 2.6-only -#endif // !HAVE_LINUX_VERSION_H - return -EINVAL; -} - -static int jt_dbg_modules_2_5(int argc, char **argv) -{ - struct mod_paths *mp; - char *path = ""; - char *kernel = "linux"; - const char *proc = "/proc/modules"; - char modname[128], others[4096]; - long modaddr; - int rc; - FILE *file; - - if (argc >= 2) - path = argv[1]; - if (argc == 3) - kernel = argv[2]; - if (argc > 3) { - printf("%s [path] [kernel]\n", argv[0]); - return 0; - } - - file = fopen(proc, "r"); - if (!file) { - printf("failed open %s: %s\n", proc, strerror(errno)); - return 0; - } - - while ((rc = fscanf(file, "%s %s %s %s %s %lx\n", - modname, others, others, others, others, &modaddr)) == 6) { - for (mp = mod_paths; mp->name != NULL; mp++) { - if (!strcmp(mp->name, modname)) - break; - } - if (mp->name) { - printf("add-symbol-file %s%s%s/%s.o 0x%0lx\n", path, - path[0] ? "/" : "", mp->path, mp->name, modaddr); - } - } - - fclose(file); - return 0; -} - -int jt_dbg_modules(int argc, char **argv) -{ - int rc = 0; - struct utsname sysinfo; - - rc = uname(&sysinfo); - if (rc) { - printf("uname() failed: %s\n", strerror(errno)); - return 0; - } - - if (sysinfo.release[2] > '4') { - return jt_dbg_modules_2_5(argc, argv); - } else { - return jt_dbg_modules_2_4(argc, argv); - } - - return 0; -} - -int jt_dbg_panic(int argc, char **argv) -{ - int rc; - struct libcfs_ioctl_data data; - - if (argc != 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - memset(&data, 0, sizeof(data)); - if (libcfs_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "libcfs_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PANIC, buf); - if (rc) { - fprintf(stderr, "IOC_LIBCFS_PANIC failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} diff --git a/lnet/utils/debugctl.c b/lnet/utils/debugctl.c deleted file mode 100644 index cf70fd8b115b9cb68135eb5cc38f9e1dee9fd170..0000000000000000000000000000000000000000 --- a/lnet/utils/debugctl.c +++ /dev/null @@ -1,67 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Some day I'll split all of this functionality into a cfs_debug module - * of its own. That day is not today. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> -#include "parser.h" - - -command_t list[] = { - {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"}, - {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file|stop], control debug daemon to dump debug buffer to a file"}, - {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file <input> [output] [raw], read debug buffer from input and print it [to output]"}, - {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"}, - {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"}, - {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"}, - {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"}, - {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"}, - {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: <path>)"}, - {"panic", jt_dbg_panic, 0, "cause the kernel to panic"}, - {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, - {"help", Parser_help, 0, "help"}, - {"exit", Parser_quit, 0, "quit"}, - {"quit", Parser_quit, 0, "quit"}, - { 0, 0, 0, NULL } -}; - -int main(int argc, char **argv) -{ - if (dbg_initialize(argc, argv) < 0) - exit(2); - - register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH, - LNET_DEV_MAJOR, LNET_DEV_MINOR); - - Parser_init("debugctl > ", list); - if (argc > 1) - return Parser_execarg(argc - 1, &argv[1], list); - - Parser_commands(); - - unregister_ioc_dev(LNET_DEV_ID); - return 0; -} diff --git a/lnet/utils/genlib.sh b/lnet/utils/genlib.sh deleted file mode 100755 index 66acf6a3b7b1995cab56f142d6beb8a6cee999d1..0000000000000000000000000000000000000000 --- a/lnet/utils/genlib.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -#set -xv -set -e - -AR=/usr/bin/ar -LD=/usr/bin/ld -RANLIB=/usr/bin/ranlib - -CWD=`pwd` - -LIBS=$1 -LND_LIBS=$2 -PTHREAD_LIBS=$3 - -# do cleanup at first -rm -f liblst.so - -ALL_OBJS= - -build_obj_list() { - _objs=`$AR -t $1/$2` - for _lib in $_objs; do - ALL_OBJS=$ALL_OBJS"$1/$_lib "; - done; -} - -# lnet components libs -build_obj_list ../../lnet/libcfs libcfs.a -if $(echo "$LND_LIBS" | grep "socklnd" >/dev/null) ; then - build_obj_list ../../lnet/ulnds/socklnd libsocklnd.a -fi -if $(echo "$LND_LIBS" | grep "ptllnd" >/dev/null) ; then - build_obj_list ../../lnet/ulnds/ptllnd libptllnd.a -fi -build_obj_list ../../lnet/lnet liblnet.a -build_obj_list ../../lnet/selftest libselftest.a - -# create static lib lustre -rm -f $CWD/liblst.a -$AR -cru $CWD/liblst.a $ALL_OBJS -$RANLIB $CWD/liblst.a diff --git a/lnet/utils/gmlndnid.c b/lnet/utils/gmlndnid.c deleted file mode 100644 index 24474b48cf8eef857e3248bc83d176b84e78d8fc..0000000000000000000000000000000000000000 --- a/lnet/utils/gmlndnid.c +++ /dev/null @@ -1,185 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * - * This file is part of Lustre, http://www.lustre.org/ - * - * This file is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <stdio.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/tcp.h> -#include <netdb.h> -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <unistd.h> -#include <syslog.h> -#include <errno.h> - -#include <lnet/api-support.h> -#include <lnet/lib-types.h> - -#include <gm.h> - -/* - * portals always uses unit 0 - * Can this be configurable? - */ -#define GM_UNIT 0 - -void -usage(char *prg, int h) -{ - fprintf(stderr, - "usage %s -h\n" - " %s [-l] [-n hostname] [-L] [hostnames]\n", prg); - - if (h) - printf("Print Myrinet Global network ids for specified hosts\n" - "-l print local host's ID\n" - "-n hostname print given host's ID\n" - "-L print Myringet local net ID too\n" - "[hostnames] print ids of given hosts (local if none)\n"); -} - -gm_status_t -print_gmid(char *name, int name_fieldlen, int show_local_id) -{ - struct gm_port *gm_port; - int gm_port_id; - gm_status_t gm_status; - unsigned int local_id; - unsigned int global_id; - - gm_status = gm_init(); - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_init: %s\n", gm_strerror(gm_status)); - return gm_status; - } - - gm_port_id = 2; - gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id, "gmnalnid", - GM_API_VERSION); - if (gm_status != GM_SUCCESS) { - int num_ports = gm_num_ports(gm_port); - - /* Couldn't open port 2, try 4 ... num_ports */ - for (gm_port_id = 4; gm_port_id < num_ports; gm_port_id++) { - gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id, - "gmnalnid", GM_API_VERSION); - if (gm_status == GM_SUCCESS) - break; - } - - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_open: %s\n",gm_strerror(gm_status)); - goto out_0; - } - } - - if (name == NULL) { - local_id = 1; - name = "<local>"; - } else { - gm_status = gm_host_name_to_node_id_ex(gm_port, 1000000, name, - &local_id); - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_host_name_to_node_id_ex(%s): %s\n", - name, gm_strerror(gm_status)); - goto out_1; - } - } - - gm_status = gm_node_id_to_global_id(gm_port, local_id, &global_id) ; - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_node_id_to_global_id(%s:%d): %s\n", - name, local_id, gm_strerror(gm_status)); - goto out_1; - } - - if (name_fieldlen > 0) - printf ("%*s ", name_fieldlen, name); - - if (!show_local_id) - printf("0x%x\n", global_id); - else - printf("local 0x%x global 0x%x\n", local_id, global_id); - - out_1: - gm_close(gm_port); - out_0: - gm_finalize(); - - return gm_status; -} - -int -main (int argc, char **argv) -{ - int c; - gm_status_t gmrc; - int rc; - int max_namelen = 0; - int show_local_id = 0; - - while ((c = getopt(argc, argv, "n:lLh")) != -1) - switch(c) { - case 'h': - usage(argv[0], 1); - return 0; - - case 'L': - show_local_id = 1; - break; - - case 'n': - gmrc = print_gmid(optarg, 0, show_local_id); - return (gmrc == GM_SUCCESS) ? 0 : 1; - - case 'l': - gmrc = print_gmid(NULL, 0, show_local_id); - return (gmrc == GM_SUCCESS) ? 0 : 1; - - default: - usage(argv[0], 0); - return 2; - } - - if (optind == argc) { - gmrc = print_gmid(NULL, 0, show_local_id); - return (gmrc == GM_SUCCESS) ? 0 : 1; - } - - if (optind != argc - 1) - for (c = optind; c < argc; c++) - if (strlen(argv[c]) > max_namelen) - max_namelen = strlen(argv[c]); - - rc = 0; - - for (c = optind; c < argc; c++) { - gmrc = print_gmid(argv[c], max_namelen, show_local_id); - - if (gmrc != GM_SUCCESS) - rc = 1; - } - - return rc; -} diff --git a/lnet/utils/l_ioctl.c b/lnet/utils/l_ioctl.c deleted file mode 100644 index 0bdb7826ee397b9b2fca5dda9de851bad999f4ee..0000000000000000000000000000000000000000 --- a/lnet/utils/l_ioctl.c +++ /dev/null @@ -1,358 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define __USE_FILE_OFFSET64 - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/mman.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <unistd.h> - -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> -#include <libcfs/portals_utils.h> - - -static ioc_handler_t do_ioctl; /* forward ref */ -static ioc_handler_t *current_ioc_handler = &do_ioctl; - -struct ioc_dev { - const char * dev_name; - int dev_fd; - int dev_major; - int dev_minor; -}; - -static struct ioc_dev ioc_dev_list[10]; - -struct dump_hdr { - int magic; - int dev_id; - unsigned int opc; -}; - -char *dump_filename; - -void -set_ioc_handler (ioc_handler_t *handler) -{ - if (handler == NULL) - current_ioc_handler = do_ioctl; - else - current_ioc_handler = handler; -} - -/* Catamount has no <linux/kdev_t.h>, so just define it here */ -#ifndef MKDEV -# define MKDEV(a,b) (((a) << 8) | (b)) -#endif - -static int -open_ioc_dev(int dev_id) -{ - const char * dev_name; - - if (dev_id < 0 || - dev_id >= sizeof(ioc_dev_list) / sizeof(ioc_dev_list[0])) - return -EINVAL; - - dev_name = ioc_dev_list[dev_id].dev_name; - if (dev_name == NULL) { - fprintf(stderr, "unknown device id: %d\n", dev_id); - return -EINVAL; - } - - if (ioc_dev_list[dev_id].dev_fd < 0) { - int fd = open(dev_name, O_RDWR); - - /* Make the /dev/ node if we need to */ - if (fd < 0 && errno == ENOENT) { - if (mknod(dev_name, - S_IFCHR|S_IWUSR|S_IRUSR, - MKDEV(ioc_dev_list[dev_id].dev_major, - ioc_dev_list[dev_id].dev_minor)) == 0) - fd = open(dev_name, O_RDWR); - else - fprintf(stderr, "mknod %s failed: %s\n", - dev_name, strerror(errno)); - } - - if (fd < 0) { - fprintf(stderr, "opening %s failed: %s\n" - "hint: the kernel modules may not be loaded\n", - dev_name, strerror(errno)); - return fd; - } - ioc_dev_list[dev_id].dev_fd = fd; - } - - return ioc_dev_list[dev_id].dev_fd; -} - - -static int -do_ioctl(int dev_id, unsigned int opc, void *buf) -{ - int fd, rc; - - fd = open_ioc_dev(dev_id); - if (fd < 0) - return fd; - - rc = ioctl(fd, opc, buf); - return rc; - -} - -static FILE * -get_dump_file() -{ - FILE *fp = NULL; - - if (!dump_filename) { - fprintf(stderr, "no dump filename\n"); - } else - fp = fopen(dump_filename, "a"); - return fp; -} - -/* - * The dump file should start with a description of which devices are - * used, but for now it will assumed whatever app reads the file will - * know what to do. */ -int -dump(int dev_id, unsigned int opc, void *buf) -{ - FILE *fp; - struct dump_hdr dump_hdr; - struct libcfs_ioctl_hdr * ioc_hdr = (struct libcfs_ioctl_hdr *) buf; - int rc; - - printf("dumping opc %x to %s\n", opc, dump_filename); - - - dump_hdr.magic = 0xdeadbeef; - dump_hdr.dev_id = dev_id; - dump_hdr.opc = opc; - - fp = get_dump_file(); - if (fp == NULL) { - fprintf(stderr, "%s: %s\n", dump_filename, - strerror(errno)); - return -EINVAL; - } - - rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp); - if (rc == 1) - rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp); - fclose(fp); - if (rc != 1) { - fprintf(stderr, "%s: %s\n", dump_filename, - strerror(errno)); - return -EINVAL; - } - - return 0; -} - -/* register a device to send ioctls to. */ -int -register_ioc_dev(int dev_id, const char * dev_name, int major, int minor) -{ - - if (dev_id < 0 || - dev_id >= sizeof(ioc_dev_list) / sizeof(ioc_dev_list[0])) - return -EINVAL; - - unregister_ioc_dev(dev_id); - - ioc_dev_list[dev_id].dev_name = dev_name; - ioc_dev_list[dev_id].dev_fd = -1; - ioc_dev_list[dev_id].dev_major = major; - ioc_dev_list[dev_id].dev_minor = minor; - - return dev_id; -} - -void -unregister_ioc_dev(int dev_id) -{ - - if (dev_id < 0 || - dev_id >= sizeof(ioc_dev_list) / sizeof(ioc_dev_list[0])) - return; - if (ioc_dev_list[dev_id].dev_name != NULL && - ioc_dev_list[dev_id].dev_fd >= 0) - close(ioc_dev_list[dev_id].dev_fd); - - ioc_dev_list[dev_id].dev_name = NULL; - ioc_dev_list[dev_id].dev_fd = -1; -} - -/* If this file is set, then all ioctl buffers will be - appended to the file. */ -int -set_ioctl_dump(char * file) -{ - if (dump_filename) - free(dump_filename); - - dump_filename = strdup(file); - if (dump_filename == NULL) - abort(); - - set_ioc_handler(&dump); - return 0; -} - -int -l_ioctl(int dev_id, unsigned int opc, void *buf) -{ - return current_ioc_handler(dev_id, opc, buf); -} - -/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer - * in the file. For example: - * - * parse_dump("lctl.dump", l_ioctl); - * - * Note: if using l_ioctl, then you also need to register_ioc_dev() for - * each device used in the dump. - */ -int -parse_dump(char * dump_file, ioc_handler_t ioc_func) -{ - int line =0; - struct stat st; - char *start, *buf, *end; -#ifndef __CYGWIN__ - int fd; -#else - HANDLE fd, hmap; - DWORD size; -#endif - -#ifndef __CYGWIN__ - fd = syscall(SYS_open, dump_file, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "couldn't open %s: %s\n", dump_file, - strerror(errno)); - exit(1); - } - - if (fstat(fd, &st)) { - perror("stat fails"); - exit(1); - } - - if (st.st_size < 1) { - fprintf(stderr, "KML is empty\n"); - exit(1); - } - - start = buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0); - end = start + st.st_size; - close(fd); - if (start == MAP_FAILED) { - fprintf(stderr, "can't create file mapping\n"); - exit(1); - } -#else - fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL, - OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - size = GetFileSize(fd, NULL); - if (size < 1) { - fprintf(stderr, "KML is empty\n"); - exit(1); - } - - hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL); - start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0); - end = buf + size; - CloseHandle(fd); - if (start == NULL) { - fprintf(stderr, "can't create file mapping\n"); - exit(1); - } -#endif /* __CYGWIN__ */ - - while (buf < end) { - struct dump_hdr *dump_hdr = (struct dump_hdr *) buf; - struct libcfs_ioctl_hdr * data; - char tmp[8096]; - int rc; - - line++; - - data = (struct libcfs_ioctl_hdr *) (buf + sizeof(*dump_hdr)); - if (buf + data->ioc_len > end ) { - fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf, - data->ioc_len, end); - return -1; - } -#if 0 - printf ("dump_hdr: %lx data: %lx\n", - (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf); - - printf("%d: opcode %x len: %d ver: %x ", line, dump_hdr->opc, - data->ioc_len, data->ioc_version); -#endif - - memcpy(tmp, data, data->ioc_len); - - rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp); - if (rc) { - printf("failed: %d\n", rc); - exit(1); - } - - buf += data->ioc_len + sizeof(*dump_hdr); - } - -#ifndef __CYGWIN__ - munmap(start, end - start); -#else - UnmapViewOfFile(start); - CloseHandle(hmap); -#endif - - return 0; -} - -int -jt_ioc_dump(int argc, char **argv) -{ - if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - return 0; - } - printf("setting dumpfile to: %s\n", argv[1]); - - set_ioctl_dump(argv[1]); - return 0; -} diff --git a/lnet/utils/lbstats b/lnet/utils/lbstats deleted file mode 100755 index a8f08575eb155556664885057cfea9b458a84a66..0000000000000000000000000000000000000000 --- a/lnet/utils/lbstats +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -echo "=== Router Buffers =======" -test -e /proc/sys/lnet/buffers && cat /proc/sys/lnet/buffers -echo -echo "=== NIs ============================================" -test -e /proc/sys/lnet/nis && cat /proc/sys/lnet/nis -echo -echo "=== Peers =============================================================" -test -e /proc/sys/lnet/peers && cat /proc/sys/lnet/peers -echo diff --git a/lnet/utils/lnetunload b/lnet/utils/lnetunload deleted file mode 100755 index 81411782565c1bffdcd4b3d8831a64e0158ea9cf..0000000000000000000000000000000000000000 --- a/lnet/utils/lnetunload +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh - -lnds=$(echo k{sock,qsw,gm,{open,i,v,o2,c}ib,ra,ptl,mx}lnd) - -do_rmmod() { - mod=$1 - if grep "^$mod" /proc/modules >/dev/null 2>&1; then - rmmod $mod - fi -} - -do_rmmod lnet_selftest - -if lctl network down > /dev/null 2>&1; then - for mod in $lnds; do do_rmmod $mod; done - - rmmod lnet - rmmod libcfs -fi diff --git a/lnet/utils/lst.c b/lnet/utils/lst.c deleted file mode 100644 index 1c16bdc3435972f60b0a0ceb4268d4fb923888f2..0000000000000000000000000000000000000000 --- a/lnet/utils/lst.c +++ /dev/null @@ -1,3163 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <getopt.h> -#include <errno.h> -#include <pwd.h> -#include <lnet/lnetctl.h> -#include <lnet/lnetst.h> -#include "parser.h" - -static command_t lst_cmdlist[]; -static lst_sid_t session_id; -static int session_key; -static lstcon_trans_stat_t trans_stat; - -typedef struct list_string { - struct list_string *lstr_next; - int lstr_sz; - char lstr_str[0]; -} lstr_t; - -#define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) - -static int alloc_count = 0; -static int alloc_nob = 0; - -lstr_t * -alloc_lstr(int sz) -{ - lstr_t *lstr = malloc(offsetof(lstr_t, lstr_str[sz])); - - if (lstr == NULL) { - fprintf(stderr, "Can't allocate lstr\n"); - abort(); - } - - alloc_nob += sz; - alloc_count++; - - lstr->lstr_str[0] = 0; - lstr->lstr_sz = sz; - return lstr; -} - -void -free_lstr(lstr_t *lstr) -{ - alloc_count--; - alloc_nob -= lstr->lstr_sz; - free(lstr); -} - -void -free_lstrs(lstr_t **list) -{ - lstr_t *lstr; - - while ((lstr = *list) != NULL) { - *list = lstr->lstr_next; - free_lstr(lstr); - } -} - -void -new_lstrs(lstr_t **list, char *prefix, char *postfix, - int lo, int hi, int stride) -{ - int n1 = strlen(prefix); - int n2 = strlen(postfix); - int sz = n1 + 20 + n2 + 1; - - do { - lstr_t *n = alloc_lstr(sz); - - snprintf(n->lstr_str, sz - 1, "%s%u%s", - prefix, lo, postfix); - - n->lstr_next = *list; - *list = n; - - lo += stride; - } while (lo <= hi); -} - -int -expand_lstr(lstr_t **list, lstr_t *l) -{ - int nob = strlen(l->lstr_str); - char *b1; - char *b2; - char *expr; - char *sep; - int x; - int y; - int z; - int n; - - b1 = strchr(l->lstr_str, '['); - if (b1 == NULL) { - l->lstr_next = *list; - *list = l; - return 0; - } - - b2 = strchr(b1, ']'); - if (b2 == NULL || b2 == b1 + 1) - return -1; - - *b1++ = 0; - *b2++ = 0; - expr = b1; - do { - - sep = strchr(expr, ','); - if (sep != NULL) - *sep++ = 0; - - nob = strlen(expr); - n = nob; - if (sscanf(expr, "%u%n", &x, &n) >= 1 && n == nob) { - /* simple number */ - new_lstrs(list, l->lstr_str, b2, x, x, 1); - continue; - } - - n = nob; - if (sscanf(expr, "%u-%u%n", &x, &y, &n) >= 2 && n == nob && - x < y) { - /* simple range */ - new_lstrs(list, l->lstr_str, b2, x, y, 1); - continue; - } - - n = nob; - if (sscanf(expr, "%u-%u/%u%n", &x, &y, &z, &n) >= 3 && n == nob && - x < y) { - /* strided range */ - new_lstrs(list, l->lstr_str, b2, x, y, z); - continue; - } - - /* syntax error */ - return -1; - } while ((expr = sep) != NULL); - - free_lstr(l); - - return 1; -} - -int -expand_strs(char *str, lstr_t **head) -{ - lstr_t *list = NULL; - lstr_t *nlist; - lstr_t *l; - int rc = 0; - int expanded; - - l = alloc_lstr(strlen(str) + 1); - memcpy(l->lstr_str, str, strlen(str) + 1); - l->lstr_next = NULL; - list = l; - - do { - expanded = 0; - nlist = NULL; - - while ((l = list) != NULL) { - list = l->lstr_next; - - rc = expand_lstr(&nlist, l); - if (rc < 0) { - fprintf(stderr, "Syntax error in \"%s\"\n", str); - free_lstr(l); - break; - } - - expanded |= rc > 0; - } - - /* re-order onto 'list' */ - while ((l = nlist) != NULL) { - nlist = l->lstr_next; - l->lstr_next = list; - list = l; - } - - } while (expanded && rc > 0); - - if (rc >= 0) { - *head = list; - return 0; - } - - while ((l = list) != NULL) { - list = l->lstr_next; - - free_lstr(l); - } - return rc; -} - -int -lst_parse_nids(char *str, int *countp, lnet_process_id_t **idspp) -{ - lstr_t *head = NULL; - lstr_t *l; - int c = 0; - int i; - int rc; - - rc = expand_strs(str, &head); - if (rc != 0) - goto out; - - l = head; - while (l != NULL) { - l = l->lstr_next; - c++; - } - - *idspp = malloc(c * sizeof(lnet_process_id_t)); - if (*idspp == NULL) { - fprintf(stderr, "Out of memory\n"); - rc = -1; - } - - *countp = c; -out: - i = 0; - while ((l = head) != NULL) { - head = l->lstr_next; - - if (rc == 0) { - (*idspp)[i].nid = libcfs_str2nid(l->lstr_str); - if ((*idspp)[i].nid == LNET_NID_ANY) { - fprintf(stderr, "Invalid nid: %s\n", - l->lstr_str); - rc = -1; - } - - (*idspp)[i].pid = LUSTRE_LNET_PID; - i++; - } - - free_lstr(l); - } - - if (rc == 0) - return 0; - - free(*idspp); - *idspp = NULL; - - return rc; -} - -char * -lst_node_state2str(int state) -{ - if (state == LST_NODE_ACTIVE) - return "Active"; - if (state == LST_NODE_BUSY) - return "Busy"; - if (state == LST_NODE_DOWN) - return "Down"; - - return "Unknown"; -} - -int -lst_node_str2state(char *str) -{ - if (strcasecmp(str, "active") == 0) - return LST_NODE_ACTIVE; - if (strcasecmp(str, "busy") == 0) - return LST_NODE_BUSY; - if (strcasecmp(str, "down") == 0) - return LST_NODE_DOWN; - if (strcasecmp(str, "unknown") == 0) - return LST_NODE_UNKNOWN; - if (strcasecmp(str, "invalid") == 0) - return (LST_NODE_UNKNOWN | LST_NODE_DOWN | LST_NODE_BUSY); - - return -1; -} - -char * -lst_test_type2name(int type) -{ - if (type == LST_TEST_PING) - return "ping"; - if (type == LST_TEST_BULK) - return "brw"; - - return "unknown"; -} - -int -lst_test_name2type(char *name) -{ - if (strcasecmp(name, "ping") == 0) - return LST_TEST_PING; - if (strcasecmp(name, "brw") == 0) - return LST_TEST_BULK; - - return -1; -} - -void -lst_print_usage(char *cmd) -{ - Parser_printhelp(cmd); -} - -void -lst_print_error(char *sub, const char *def_format, ...) -{ - va_list ap; - - /* local error returned from kernel */ - switch (errno) { - case ESRCH: - fprintf(stderr, "No session exists\n"); - return; - case ESHUTDOWN: - fprintf(stderr, "Session is shutting down\n"); - return; - case EACCES: - fprintf(stderr, "Unmatched session key or not root\n"); - return; - case ENOENT: - fprintf(stderr, "Can't find %s in current session\n", sub); - return; - case EINVAL: - fprintf(stderr, "Invalid parameters list in command line\n"); - return; - case EFAULT: - fprintf(stderr, "Bad parameter address\n"); - return; - case EEXIST: - fprintf(stderr, "%s already exists\n", sub); - return; - default: - va_start(ap, def_format); - vfprintf(stderr, def_format, ap); - va_end(ap); - - return; - } -} - -void -lst_free_rpcent(struct list_head *head) -{ - lstcon_rpc_ent_t *ent; - - while (!list_empty(head)) { - ent = list_entry(head->next, lstcon_rpc_ent_t, rpe_link); - - list_del(&ent->rpe_link); - free(ent); - } -} - -void -lst_reset_rpcent(struct list_head *head) -{ - lstcon_rpc_ent_t *ent; - - list_for_each_entry(ent, head, rpe_link) { - ent->rpe_sid = LST_INVALID_SID; - ent->rpe_peer.nid = LNET_NID_ANY; - ent->rpe_peer.pid = LNET_PID_ANY; - ent->rpe_rpc_errno = ent->rpe_fwk_errno = 0; - } -} - -int -lst_alloc_rpcent(struct list_head *head, int count, int offset) -{ - lstcon_rpc_ent_t *ent; - int i; - - for (i = 0; i < count; i++) { - ent = malloc(offsetof(lstcon_rpc_ent_t, rpe_payload[offset])); - if (ent == NULL) { - lst_free_rpcent(head); - return -1; - } - - memset(ent, 0, offsetof(lstcon_rpc_ent_t, rpe_payload[offset])); - - ent->rpe_sid = LST_INVALID_SID; - ent->rpe_peer.nid = LNET_NID_ANY; - ent->rpe_peer.pid = LNET_PID_ANY; - list_add(&ent->rpe_link, head); - } - - return 0; -} - -void -lst_print_transerr(struct list_head *head, char *optstr) -{ - lstcon_rpc_ent_t *ent; - - list_for_each_entry(ent, head, rpe_link) { - if (ent->rpe_rpc_errno == 0 && ent->rpe_fwk_errno == 0) - continue; - - if (ent->rpe_rpc_errno != 0) { - fprintf(stderr, "%s RPC failed on %s: %s\n", - optstr, libcfs_id2str(ent->rpe_peer), - strerror(ent->rpe_rpc_errno)); - continue; - } - - fprintf(stderr, "%s failed on %s: %s\n", - optstr, libcfs_id2str(ent->rpe_peer), - strerror(ent->rpe_fwk_errno)); - } -} - -int lst_info_batch_ioctl(char *batch, int test, int server, - lstcon_test_batch_ent_t *entp, int *idxp, - int *ndentp, lstcon_node_ent_t *dentsp); - -int lst_info_group_ioctl(char *name, lstcon_ndlist_ent_t *gent, - int *idx, int *count, lstcon_node_ent_t *dents); - -int lst_query_batch_ioctl(char *batch, int test, int server, - int timeout, struct list_head *head); - -int -lst_ioctl(unsigned int opc, void *buf, int len) -{ - struct libcfs_ioctl_data data; - int rc; - - LIBCFS_IOC_INIT (data); - data.ioc_u32[0] = opc; - data.ioc_plen1 = len; - data.ioc_pbuf1 = (char *)buf; - data.ioc_plen2 = sizeof(trans_stat); - data.ioc_pbuf2 = (char *)&trans_stat; - - memset(&trans_stat, 0, sizeof(trans_stat)); - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNETST, &data); - - /* local error, no valid RPC result */ - if (rc != 0) - return -1; - - /* RPC error */ - if (trans_stat.trs_rpc_errno != 0) - return -2; - - /* Framework error */ - if (trans_stat.trs_fwk_errno != 0) - return -3; - - return 0; -} - -int -lst_new_session_ioctl (char *name, int timeout, int force, lst_sid_t *sid) -{ - lstio_session_new_args_t args = { - .lstio_ses_key = session_key, - .lstio_ses_timeout = timeout, - .lstio_ses_force = force, - .lstio_ses_idp = sid, - .lstio_ses_namep = name, - .lstio_ses_nmlen = strlen(name), - }; - - return lst_ioctl (LSTIO_SESSION_NEW, &args, sizeof(args)); -} - -int -jt_lst_new_session(int argc, char **argv) -{ - char buf[LST_NAME_SIZE]; - char *name; - int optidx = 0; - int timeout = 300; - int force = 0; - int c; - int rc; - - static struct option session_opts[] = - { - {"timeout", required_argument, 0, 't' }, - {"force", no_argument, 0, 'f' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - - c = getopt_long(argc, argv, "ft:", - session_opts, &optidx); - - if (c == -1) - break; - - switch (c) { - case 'f': - force = 1; - break; - case 't': - timeout = atoi(optarg); - break; - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (timeout <= 0) { - fprintf(stderr, "Invalid timeout value\n"); - return -1; - } - - if (optind == argc - 1) { - name = argv[optind ++]; - if (strlen(name) >= LST_NAME_SIZE) { - fprintf(stderr, "Name size is limited to %d\n", - LST_NAME_SIZE - 1); - return -1; - } - - } else if (optind == argc) { - char user[LST_NAME_SIZE]; - char host[LST_NAME_SIZE]; - struct passwd *pw = getpwuid(getuid()); - - if (pw == NULL) - snprintf(user, sizeof(user), "%d", (int)getuid()); - else - snprintf(user, sizeof(user), "%s", pw->pw_name); - - rc = gethostname(host, sizeof(host)); - if (rc != 0) - snprintf(host, sizeof(host), "unknown_host"); - - snprintf(buf, LST_NAME_SIZE, "%s@%s", user, host); - name = buf; - - } else { - lst_print_usage(argv[0]); - return -1; - } - - rc = lst_new_session_ioctl(name, timeout, force, &session_id); - - if (rc != 0) { - lst_print_error("session", "Failed to create session: %s\n", - strerror(errno)); - return rc; - } - - fprintf(stdout, "SESSION: %s TIMEOUT: %d FORCE: %s\n", - name, timeout, force ? "Yes": "No"); - - return rc; -} - -int -lst_session_info_ioctl(char *name, int len, int *key, - lst_sid_t *sid, lstcon_ndlist_ent_t *ndinfo) -{ - lstio_session_info_args_t args = { - .lstio_ses_keyp = key, - .lstio_ses_idp = sid, - .lstio_ses_ndinfo = ndinfo, - .lstio_ses_nmlen = len, - .lstio_ses_namep = name, - }; - - return lst_ioctl(LSTIO_SESSION_INFO, &args, sizeof(args)); -} - -int -jt_lst_show_session(int argc, char **argv) -{ - lstcon_ndlist_ent_t ndinfo; - lst_sid_t sid; - char name[LST_NAME_SIZE]; - int key; - int rc; - - rc = lst_session_info_ioctl(name, LST_NAME_SIZE, &key, &sid, &ndinfo); - - if (rc != 0) { - lst_print_error("session", "Failed to show session: %s\n", - strerror(errno)); - return -1; - } - - fprintf(stdout, "%s ID: %Lu@%s, KEY: %d NODES: %d\n", - name, sid.ses_stamp, libcfs_nid2str(sid.ses_nid), - key, ndinfo.nle_nnode); - - return 0; -} - -int -lst_end_session_ioctl(void) -{ - lstio_session_end_args_t args = { - .lstio_ses_key = session_key, - }; - - return lst_ioctl (LSTIO_SESSION_END, &args, sizeof(args)); -} - -int -jt_lst_end_session(int argc, char **argv) -{ - int rc; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - rc = lst_end_session_ioctl(); - - if (rc == 0) { - fprintf(stdout, "session is ended\n"); - return 0; - } - - if (rc == -1) { - lst_print_error("session", "Failed to end session: %s\n", - strerror(errno)); - return rc; - } - - if (trans_stat.trs_rpc_errno != 0) { - fprintf(stderr, - "[RPC] Failed to send %d session RPCs: %s\n", - lstcon_rpc_stat_failure(&trans_stat, 0), - strerror(trans_stat.trs_rpc_errno)); - } - - if (trans_stat.trs_fwk_errno != 0) { - fprintf(stderr, - "[FWK] Failed to end session on %d nodes: %s\n", - lstcon_sesop_stat_failure(&trans_stat, 0), - strerror(trans_stat.trs_fwk_errno)); - } - - return rc; -} - -int -lst_ping_ioctl(char *str, int type, int timeout, - int count, lnet_process_id_t *ids, struct list_head *head) -{ - lstio_debug_args_t args = { - .lstio_dbg_key = session_key, - .lstio_dbg_type = type, - .lstio_dbg_flags = 0, - .lstio_dbg_timeout = timeout, - .lstio_dbg_nmlen = (str == NULL) ? 0: strlen(str), - .lstio_dbg_namep = str, - .lstio_dbg_count = count, - .lstio_dbg_idsp = ids, - .lstio_dbg_resultp = head, - }; - - return lst_ioctl (LSTIO_DEBUG, &args, sizeof(args)); -} - -int -lst_get_node_count(int type, char *str, int *countp, lnet_process_id_t **idspp) -{ - char buf[LST_NAME_SIZE]; - lstcon_test_batch_ent_t ent; - lstcon_ndlist_ent_t *entp = &ent.tbe_cli_nle; - lst_sid_t sid; - int key; - int rc; - - switch (type) { - case LST_OPC_SESSION: - rc = lst_session_info_ioctl(buf, LST_NAME_SIZE, - &key, &sid, entp); - break; - - case LST_OPC_BATCHSRV: - entp = &ent.tbe_srv_nle; - case LST_OPC_BATCHCLI: - rc = lst_info_batch_ioctl(str, 0, 0, &ent, NULL, NULL, NULL); - break; - - case LST_OPC_GROUP: - rc = lst_info_group_ioctl(str, entp, NULL, NULL, NULL); - break; - - case LST_OPC_NODES: - rc = lst_parse_nids(str, &entp->nle_nnode, idspp) < 0 ? -1 : 0; - break; - - default: - rc = -1; - break; - } - - if (rc == 0) - *countp = entp->nle_nnode; - - return rc; -} - -int -jt_lst_ping(int argc, char **argv) -{ - struct list_head head; - lnet_process_id_t *ids = NULL; - lstcon_rpc_ent_t *ent = NULL; - char *str = NULL; - int optidx = 0; - int server = 0; - int timeout = 5; - int count = 0; - int type = 0; - int rc = 0; - int c; - - static struct option ping_opts[] = - { - {"session", no_argument, 0, 's' }, - {"server", no_argument, 0, 'v' }, - {"batch", required_argument, 0, 'b' }, - {"group", required_argument, 0, 'g' }, - {"nodes", required_argument, 0, 'n' }, - {"timeout", required_argument, 0, 't' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - - c = getopt_long(argc, argv, "g:b:n:t:sv", - ping_opts, &optidx); - - if (c == -1) - break; - - switch (c) { - case 's': - type = LST_OPC_SESSION; - break; - - case 'g': - type = LST_OPC_GROUP; - str = optarg; - break; - - case 'b': - type = LST_OPC_BATCHCLI; - str = optarg; - break; - - case 'n': - type = LST_OPC_NODES; - str = optarg; - break; - - case 't': - timeout = atoi(optarg); - break; - - case 'v': - server = 1; - break; - - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (type == 0 || timeout <= 0 || optind != argc) { - lst_print_usage(argv[0]); - return -1; - } - - if (type == LST_OPC_BATCHCLI && server) - type = LST_OPC_BATCHSRV; - - rc = lst_get_node_count(type, str, &count, &ids); - if (rc < 0) { - fprintf(stderr, "Failed to get count of nodes from %s: %s\n", - (str == NULL) ? "session" : str, strerror(errno)); - return -1; - } - - CFS_INIT_LIST_HEAD(&head); - - rc = lst_alloc_rpcent(&head, count, LST_NAME_SIZE); - if (rc != 0) { - fprintf(stderr, "Out of memory\n"); - goto out; - } - - if (count == 0) { - fprintf(stdout, "Target %s is empty\n", - (str == NULL) ? "session" : str); - goto out; - } - - rc = lst_ping_ioctl(str, type, timeout, count, ids, &head); - if (rc == -1) { /* local failure */ - lst_print_error("debug", "Failed to ping %s: %s\n", - (str == NULL) ? "session" : str, - strerror(errno)); - rc = -1; - goto out; - } - - /* ignore RPC errors and framwork errors */ - list_for_each_entry(ent, &head, rpe_link) { - fprintf(stdout, "\t%s: %s [session: %s id: %s]\n", - libcfs_id2str(ent->rpe_peer), - lst_node_state2str(ent->rpe_state), - (ent->rpe_state == LST_NODE_ACTIVE || - ent->rpe_state == LST_NODE_BUSY)? - (ent->rpe_rpc_errno == 0 ? - &ent->rpe_payload[0] : "Unknown") : - "<NULL>", libcfs_nid2str(ent->rpe_sid.ses_nid)); - } - -out: - lst_free_rpcent(&head); - - if (ids != NULL) - free(ids); - - return rc; - -} - -int -lst_add_nodes_ioctl (char *name, int count, lnet_process_id_t *ids, - struct list_head *resultp) -{ - lstio_group_nodes_args_t args = { - .lstio_grp_key = session_key, - .lstio_grp_nmlen = strlen(name), - .lstio_grp_namep = name, - .lstio_grp_count = count, - .lstio_grp_idsp = ids, - .lstio_grp_resultp = resultp, - }; - - return lst_ioctl(LSTIO_NODES_ADD, &args, sizeof(args)); -} - -int -lst_add_group_ioctl (char *name) -{ - lstio_group_add_args_t args = { - .lstio_grp_key = session_key, - .lstio_grp_nmlen = strlen(name), - .lstio_grp_namep = name, - }; - - return lst_ioctl(LSTIO_GROUP_ADD, &args, sizeof(args)); -} - -int -jt_lst_add_group(int argc, char **argv) -{ - struct list_head head; - lnet_process_id_t *ids; - char *name; - int count; - int rc; - int i; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - if (argc < 3) { - lst_print_usage(argv[0]); - return -1; - } - - name = argv[1]; - if (strlen(name) >= LST_NAME_SIZE) { - fprintf(stderr, "Name length is limited to %d\n", - LST_NAME_SIZE - 1); - return -1; - } - - rc = lst_add_group_ioctl(name); - if (rc != 0) { - lst_print_error("group", "Failed to add group %s: %s\n", - name, strerror(errno)); - return -1; - } - - CFS_INIT_LIST_HEAD(&head); - - for (i = 2; i < argc; i++) { - /* parse address list */ - rc = lst_parse_nids(argv[i], &count, &ids); - if (rc < 0) { - fprintf(stderr, "Ignore invalid id list %s\n", - argv[i]); - continue; - } - - if (count == 0) - continue; - - rc = lst_alloc_rpcent(&head, count, 0); - if (rc != 0) { - fprintf(stderr, "Out of memory\n"); - break; - } - - rc = lst_add_nodes_ioctl(name, count, ids, &head); - - free(ids); - - if (rc == 0) { - lst_free_rpcent(&head); - fprintf(stderr, "%s are added to session\n", argv[i]); - continue; - } - - if (rc == -1) { - lst_free_rpcent(&head); - lst_print_error("group", "Failed to add nodes %s: %s\n", - argv[i], strerror(errno)); - break; - } - - lst_print_transerr(&head, "create session"); - lst_free_rpcent(&head); - } - - return rc; -} - -int -lst_del_group_ioctl (char *name) -{ - lstio_group_del_args_t args = { - .lstio_grp_key = session_key, - .lstio_grp_nmlen = strlen(name), - .lstio_grp_namep = name, - }; - - return lst_ioctl(LSTIO_GROUP_DEL, &args, sizeof(args)); -} - -int -jt_lst_del_group(int argc, char **argv) -{ - int rc; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - if (argc != 2) { - lst_print_usage(argv[0]); - return -1; - } - - rc = lst_del_group_ioctl(argv[1]); - if (rc == 0) { - fprintf(stdout, "Group is deleted\n"); - return 0; - } - - if (rc == -1) { - lst_print_error("group", "Failed to delete group: %s\n", - strerror(errno)); - return rc; - } - - fprintf(stderr, "Group is deleted with some errors\n"); - - if (trans_stat.trs_rpc_errno != 0) { - fprintf(stderr, "[RPC] Failed to send %d end session RPCs: %s\n", - lstcon_rpc_stat_failure(&trans_stat, 0), - strerror(trans_stat.trs_rpc_errno)); - } - - if (trans_stat.trs_fwk_errno != 0) { - fprintf(stderr, - "[FWK] Failed to end session on %d nodes: %s\n", - lstcon_sesop_stat_failure(&trans_stat, 0), - strerror(trans_stat.trs_fwk_errno)); - } - - return -1; -} - -int -lst_update_group_ioctl(int opc, char *name, int clean, int count, - lnet_process_id_t *ids, struct list_head *resultp) -{ - lstio_group_update_args_t args = { - .lstio_grp_key = session_key, - .lstio_grp_opc = opc, - .lstio_grp_args = clean, - .lstio_grp_nmlen = strlen(name), - .lstio_grp_namep = name, - .lstio_grp_count = count, - .lstio_grp_idsp = ids, - .lstio_grp_resultp = resultp, - }; - - return lst_ioctl(LSTIO_GROUP_UPDATE, &args, sizeof(args)); -} - -int -jt_lst_update_group(int argc, char **argv) -{ - struct list_head head; - lnet_process_id_t *ids = NULL; - char *str = NULL; - char *grp = NULL; - int optidx = 0; - int count = 0; - int clean = 0; - int opc = 0; - int rc; - int c; - - static struct option update_group_opts[] = - { - {"refresh", no_argument, 0, 'f' }, - {"clean", required_argument, 0, 'c' }, - {"remove", required_argument, 0, 'r' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - c = getopt_long(argc, argv, "fc:r:", - update_group_opts, &optidx); - - /* Detect the end of the options. */ - if (c == -1) - break; - - switch (c) { - case 'f': - if (opc != 0) { - lst_print_usage(argv[0]); - return -1; - } - opc = LST_GROUP_REFRESH; - break; - - case 'r': - if (opc != 0) { - lst_print_usage(argv[0]); - return -1; - } - opc = LST_GROUP_RMND; - str = optarg; - break; - - case 'c': - clean = lst_node_str2state(optarg); - if (opc != 0 || clean <= 0) { - lst_print_usage(argv[0]); - return -1; - } - opc = LST_GROUP_CLEAN; - break; - - default: - lst_print_usage(argv[0]); - return -1; - } - } - - /* no OPC or group is specified */ - if (opc == 0 || optind != argc - 1) { - lst_print_usage(argv[0]); - return -1; - } - - grp = argv[optind]; - - CFS_INIT_LIST_HEAD(&head); - - if (opc == LST_GROUP_RMND || opc == LST_GROUP_REFRESH) { - rc = lst_get_node_count(opc == LST_GROUP_RMND ? LST_OPC_NODES : - LST_OPC_GROUP, - opc == LST_GROUP_RMND ? str : grp, - &count, &ids); - - if (rc != 0) { - fprintf(stderr, "Can't get count of nodes from %s: %s\n", - opc == LST_GROUP_RMND ? str : grp, - strerror(errno)); - return -1; - } - - rc = lst_alloc_rpcent(&head, count, 0); - if (rc != 0) { - fprintf(stderr, "Out of memory\n"); - free(ids); - return -1; - } - - } - - rc = lst_update_group_ioctl(opc, grp, clean, count, ids, &head); - - if (ids != NULL) - free(ids); - - if (rc == 0) { - lst_free_rpcent(&head); - return 0; - } - - if (rc == -1) { - lst_free_rpcent(&head); - lst_print_error("group", "Failed to update group: %s\n", - strerror(errno)); - return rc; - } - - lst_print_transerr(&head, "Updating group"); - - lst_free_rpcent(&head); - - return rc; -} - -int -lst_list_group_ioctl(int len, char *name, int idx) -{ - lstio_group_list_args_t args = { - .lstio_grp_key = session_key, - .lstio_grp_idx = idx, - .lstio_grp_nmlen = len, - .lstio_grp_namep = name, - }; - - return lst_ioctl(LSTIO_GROUP_LIST, &args, sizeof(args)); -} - -int -lst_info_group_ioctl(char *name, lstcon_ndlist_ent_t *gent, - int *idx, int *count, lstcon_node_ent_t *dents) -{ - lstio_group_info_args_t args = { - .lstio_grp_key = session_key, - .lstio_grp_nmlen = strlen(name), - .lstio_grp_namep = name, - .lstio_grp_entp = gent, - .lstio_grp_idxp = idx, - .lstio_grp_ndentp = count, - .lstio_grp_dentsp = dents, - }; - - return lst_ioctl(LSTIO_GROUP_INFO, &args, sizeof(args)); -} - -int -lst_list_group_all(void) -{ - char name[LST_NAME_SIZE]; - int rc; - int i; - - /* no group is specified, list name of all groups */ - for (i = 0; ; i++) { - rc = lst_list_group_ioctl(LST_NAME_SIZE, name, i); - if (rc == 0) { - fprintf(stdout, "%d) %s\n", i + 1, name); - continue; - } - - if (errno == ENOENT) - break; - - lst_print_error("group", "Failed to list group: %s\n", - strerror(errno)); - return -1; - } - - fprintf(stdout, "Total %d groups\n", i); - - return 0; -} - -#define LST_NODES_TITLE "\tACTIVE\tBUSY\tDOWN\tUNKNOWN\tTOTAL\n" - -int -jt_lst_list_group(int argc, char **argv) -{ - lstcon_ndlist_ent_t gent; - lstcon_node_ent_t *dents; - int optidx = 0; - int verbose = 0; - int active = 0; - int busy = 0; - int down = 0; - int unknown = 0; - int all = 0; - int count; - int index; - int i; - int j; - int c; - int rc = 0; - - static struct option list_group_opts[] = - { - {"active", no_argument, 0, 'a' }, - {"busy", no_argument, 0, 'b' }, - {"down", no_argument, 0, 'd' }, - {"unknown", no_argument, 0, 'u' }, - {"all", no_argument, 0, 'l' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - c = getopt_long(argc, argv, "abdul", - list_group_opts, &optidx); - - if (c == -1) - break; - - switch (c) { - case 'a': - verbose = active = 1; - all = 0; - break; - case 'b': - verbose = busy = 1; - all = 0; - break; - case 'd': - verbose = down = 1; - all = 0; - break; - case 'u': - verbose = unknown = 1; - all = 0; - break; - case 'l': - verbose = all = 1; - break; - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (optind == argc) { - /* no group is specified, list name of all groups */ - rc = lst_list_group_all(); - - return rc; - } - - if (!verbose) - fprintf(stdout, LST_NODES_TITLE); - - /* list nodes in specified groups */ - for (i = optind; i < argc; i++) { - rc = lst_info_group_ioctl(argv[i], &gent, NULL, NULL, NULL); - if (rc != 0) { - if (errno == ENOENT) { - rc = 0; - break; - } - - lst_print_error("group", "Failed to list group\n", - strerror(errno)); - break; - } - - if (!verbose) { - fprintf(stdout, "\t%d\t%d\t%d\t%d\t%d\t%s\n", - gent.nle_nactive, gent.nle_nbusy, - gent.nle_ndown, gent.nle_nunknown, - gent.nle_nnode, argv[i]); - continue; - } - - fprintf(stdout, "Group [ %s ]\n", argv[i]); - - if (gent.nle_nnode == 0) { - fprintf(stdout, "No nodes found [ %s ]\n", argv[i]); - continue; - } - - count = gent.nle_nnode; - - dents = malloc(count * sizeof(lstcon_node_ent_t)); - if (dents == NULL) { - fprintf(stderr, "Failed to malloc: %s\n", - strerror(errno)); - return -1; - } - - index = 0; - rc = lst_info_group_ioctl(argv[i], &gent, &index, &count, dents); - if (rc != 0) { - lst_print_error("group", "Failed to list group: %s\n", - strerror(errno)); - free(dents); - return -1; - } - - for (j = 0, c = 0; j < count; j++) { - if (all || - ((active && dents[j].nde_state == LST_NODE_ACTIVE) || - (busy && dents[j].nde_state == LST_NODE_BUSY) || - (down && dents[j].nde_state == LST_NODE_DOWN) || - (unknown && dents[j].nde_state == LST_NODE_UNKNOWN))) { - - fprintf(stdout, "\t%s: %s\n", - libcfs_id2str(dents[j].nde_id), - lst_node_state2str(dents[j].nde_state)); - c++; - } - } - - fprintf(stdout, "Total %d nodes [ %s ]\n", c, argv[i]); - - free(dents); - } - - return rc; -} - -int -lst_stat_ioctl (char *name, int count, lnet_process_id_t *idsp, - int timeout, struct list_head *resultp) -{ - lstio_stat_args_t args = { - .lstio_sta_key = session_key, - .lstio_sta_timeout = timeout, - .lstio_sta_nmlen = strlen(name), - .lstio_sta_namep = name, - .lstio_sta_count = count, - .lstio_sta_idsp = idsp, - .lstio_sta_resultp = resultp, - }; - - return lst_ioctl (LSTIO_STAT_QUERY, &args, sizeof(args)); -} - -typedef struct { - struct list_head srp_link; - int srp_count; - char *srp_name; - lnet_process_id_t *srp_ids; - struct list_head srp_result[2]; -} lst_stat_req_param_t; - -static void -lst_stat_req_param_free(lst_stat_req_param_t *srp) -{ - int i; - - for (i = 0; i < 2; i++) - lst_free_rpcent(&srp->srp_result[i]); - - if (srp->srp_ids != NULL) - free(srp->srp_ids); - - free(srp); -} - -static int -lst_stat_req_param_alloc(char *name, lst_stat_req_param_t **srpp, int save_old) -{ - lst_stat_req_param_t *srp = NULL; - int count = save_old ? 2 : 1; - int rc; - int i; - - srp = malloc(sizeof(*srp)); - if (srp == NULL) - return -ENOMEM; - - memset(srp, 0, sizeof(*srp)); - CFS_INIT_LIST_HEAD(&srp->srp_result[0]); - CFS_INIT_LIST_HEAD(&srp->srp_result[1]); - - rc = lst_get_node_count(LST_OPC_GROUP, name, - &srp->srp_count, NULL); - if (rc != 0 && errno == ENOENT) { - rc = lst_get_node_count(LST_OPC_NODES, name, - &srp->srp_count, &srp->srp_ids); - } - - if (rc != 0) { - fprintf(stderr, - "Failed to get count of nodes from %s: %s\n", - name, strerror(errno)); - lst_stat_req_param_free(srp); - - return rc; - } - - srp->srp_name = name; - - for (i = 0; i < count; i++) { - rc = lst_alloc_rpcent(&srp->srp_result[i], srp->srp_count, - sizeof(sfw_counters_t) + - sizeof(srpc_counters_t) + - sizeof(lnet_counters_t)); - if (rc != 0) { - fprintf(stderr, "Out of memory\n"); - break; - } - } - - if (rc == 0) { - *srpp = srp; - return 0; - } - - lst_stat_req_param_free(srp); - - return rc; -} - -typedef struct { - /* TODO */ -} lst_srpc_stat_result; - -#define LST_LNET_AVG 0 -#define LST_LNET_MIN 1 -#define LST_LNET_MAX 2 - -typedef struct { - float lnet_avg_sndrate; - float lnet_min_sndrate; - float lnet_max_sndrate; - float lnet_total_sndrate; - - float lnet_avg_rcvrate; - float lnet_min_rcvrate; - float lnet_max_rcvrate; - float lnet_total_rcvrate; - - float lnet_avg_sndperf; - float lnet_min_sndperf; - float lnet_max_sndperf; - float lnet_total_sndperf; - - float lnet_avg_rcvperf; - float lnet_min_rcvperf; - float lnet_max_rcvperf; - float lnet_total_rcvperf; - - int lnet_stat_count; -} lst_lnet_stat_result_t; - -lst_lnet_stat_result_t lnet_stat_result; - -static float -lst_lnet_stat_value(int bw, int send, int off) -{ - float *p; - - p = bw ? &lnet_stat_result.lnet_avg_sndperf : - &lnet_stat_result.lnet_avg_sndrate; - - if (!send) - p += 4; - - p += off; - - return *p; -} - -static void -lst_timeval_diff(struct timeval *tv1, - struct timeval *tv2, struct timeval *df) -{ - if (tv1->tv_usec >= tv2->tv_usec) { - df->tv_sec = tv1->tv_sec - tv2->tv_sec; - df->tv_usec = tv1->tv_usec - tv2->tv_usec; - return; - } - - df->tv_sec = tv1->tv_sec - 1 - tv2->tv_sec; - df->tv_usec = tv1->tv_sec + 1000000 - tv2->tv_usec; - - return; -} - -void -lst_cal_lnet_stat(float delta, lnet_counters_t *lnet_new, - lnet_counters_t *lnet_old) -{ - float perf; - float rate; - - perf = (float)(lnet_new->send_length - - lnet_old->send_length) / (1024 * 1024) / delta; - lnet_stat_result.lnet_total_sndperf += perf; - - if (lnet_stat_result.lnet_min_sndperf > perf || - lnet_stat_result.lnet_min_sndperf == 0) - lnet_stat_result.lnet_min_sndperf = perf; - - if (lnet_stat_result.lnet_max_sndperf < perf) - lnet_stat_result.lnet_max_sndperf = perf; - - perf = (float)(lnet_new->recv_length - - lnet_old->recv_length) / (1024 * 1024) / delta; - lnet_stat_result.lnet_total_rcvperf += perf; - - if (lnet_stat_result.lnet_min_rcvperf > perf || - lnet_stat_result.lnet_min_rcvperf == 0) - lnet_stat_result.lnet_min_rcvperf = perf; - - if (lnet_stat_result.lnet_max_rcvperf < perf) - lnet_stat_result.lnet_max_rcvperf = perf; - - rate = (lnet_new->send_count - lnet_old->send_count) / delta; - lnet_stat_result.lnet_total_sndrate += rate; - - if (lnet_stat_result.lnet_min_sndrate > rate || - lnet_stat_result.lnet_min_sndrate == 0) - lnet_stat_result.lnet_min_sndrate = rate; - - if (lnet_stat_result.lnet_max_sndrate < rate) - lnet_stat_result.lnet_max_sndrate = rate; - - rate = (lnet_new->recv_count - lnet_old->recv_count) / delta; - lnet_stat_result.lnet_total_rcvrate += rate; - - if (lnet_stat_result.lnet_min_rcvrate > rate || - lnet_stat_result.lnet_min_rcvrate == 0) - lnet_stat_result.lnet_min_rcvrate = rate; - - if (lnet_stat_result.lnet_max_rcvrate < rate) - lnet_stat_result.lnet_max_rcvrate = rate; - - lnet_stat_result.lnet_stat_count ++; - - lnet_stat_result.lnet_avg_sndrate = lnet_stat_result.lnet_total_sndrate / - lnet_stat_result.lnet_stat_count; - lnet_stat_result.lnet_avg_rcvrate = lnet_stat_result.lnet_total_rcvrate / - lnet_stat_result.lnet_stat_count; - - lnet_stat_result.lnet_avg_sndperf = lnet_stat_result.lnet_total_sndperf / - lnet_stat_result.lnet_stat_count; - lnet_stat_result.lnet_avg_rcvperf = lnet_stat_result.lnet_total_rcvperf / - lnet_stat_result.lnet_stat_count; - -} - -void -lst_print_lnet_stat(char *name, int bwrt, int rdwr, int type) -{ - int start1 = 0; - int end1 = 1; - int start2 = 0; - int end2 = 1; - int i; - int j; - - if (lnet_stat_result.lnet_stat_count == 0) - return; - - if (bwrt == 1) /* bw only */ - start1 = 1; - - if (bwrt == 2) /* rates only */ - end1 = 0; - - if (rdwr == 1) /* recv only */ - start2 = 1; - - if (rdwr == 2) /* send only */ - end2 = 0; - - for (i = start1; i <= end1; i++) { - fprintf(stdout, "[LNet %s of %s]\n", - i == 0 ? "Rates" : "Bandwidth", name); - - for (j = start2; j <= end2; j++) { - fprintf(stdout, "[%c] ", j == 0 ? 'R' : 'W'); - - if ((type & 1) != 0) { - fprintf(stdout, i == 0 ? "Avg: %-8.0f RPC/s " : - "Avg: %-8.2f MB/s ", - lst_lnet_stat_value(i, j, 0)); - } - - if ((type & 2) != 0) { - fprintf(stdout, i == 0 ? "Min: %-8.0f RPC/s " : - "Min: %-8.2f MB/s ", - lst_lnet_stat_value(i, j, 1)); - } - - if ((type & 4) != 0) { - fprintf(stdout, i == 0 ? "Max: %-8.0f RPC/s" : - "Max: %-8.2f MB/s", - lst_lnet_stat_value(i, j, 2)); - } - - fprintf(stdout, "\n"); - } - } -} - -void -lst_print_stat(char *name, struct list_head *resultp, - int idx, int lnet, int bwrt, int rdwr, int type) -{ - struct list_head tmp[2]; - lstcon_rpc_ent_t *new; - lstcon_rpc_ent_t *old; - sfw_counters_t *sfwk_new; - sfw_counters_t *sfwk_old; - srpc_counters_t *srpc_new; - srpc_counters_t *srpc_old; - lnet_counters_t *lnet_new; - lnet_counters_t *lnet_old; - struct timeval tv; - float delta; - int errcount = 0; - - CFS_INIT_LIST_HEAD(&tmp[0]); - CFS_INIT_LIST_HEAD(&tmp[1]); - - memset(&lnet_stat_result, 0, sizeof(lnet_stat_result)); - - while (!list_empty(&resultp[idx])) { - if (list_empty(&resultp[1 - idx])) { - fprintf(stderr, "Group is changed, re-run stat\n"); - break; - } - - new = list_entry(resultp[idx].next, lstcon_rpc_ent_t, rpe_link); - old = list_entry(resultp[1 - idx].next, lstcon_rpc_ent_t, rpe_link); - - /* first time get stats result, can't calculate diff */ - if (new->rpe_peer.nid == LNET_NID_ANY) - break; - - if (new->rpe_peer.nid != old->rpe_peer.nid || - new->rpe_peer.pid != old->rpe_peer.pid) { - /* Something wrong. i.e, somebody change the group */ - break; - } - - list_del(&new->rpe_link); - list_add_tail(&new->rpe_link, &tmp[idx]); - - list_del(&old->rpe_link); - list_add_tail(&old->rpe_link, &tmp[1 - idx]); - - if (new->rpe_rpc_errno != 0 || new->rpe_fwk_errno != 0 || - old->rpe_rpc_errno != 0 || old->rpe_fwk_errno != 0) { - errcount ++; - continue; - } - - sfwk_new = (sfw_counters_t *)&new->rpe_payload[0]; - sfwk_old = (sfw_counters_t *)&old->rpe_payload[0]; - - srpc_new = (srpc_counters_t *)((char *)sfwk_new + sizeof(*sfwk_new)); - srpc_old = (srpc_counters_t *)((char *)sfwk_old + sizeof(*sfwk_old)); - - lnet_new = (lnet_counters_t *)((char *)srpc_new + sizeof(*srpc_new)); - lnet_old = (lnet_counters_t *)((char *)srpc_old + sizeof(*srpc_old)); - - lst_timeval_diff(&new->rpe_stamp, &old->rpe_stamp, &tv); - - delta = tv.tv_sec + (float)tv.tv_usec/1000000; - - if (!lnet) /* TODO */ - continue; - - lst_cal_lnet_stat(delta, lnet_new, lnet_old); - } - - list_splice(&tmp[idx], &resultp[idx]); - list_splice(&tmp[1 - idx], &resultp[1 - idx]); - - if (errcount > 0) - fprintf(stdout, "Failed to stat on %d nodes\n", errcount); - - if (!lnet) /* TODO */ - return; - - lst_print_lnet_stat(name, bwrt, rdwr, type); -} - -int -jt_lst_stat(int argc, char **argv) -{ - struct list_head head; - lst_stat_req_param_t *srp; - time_t last = 0; - int optidx = 0; - int timeout = 5; /* default timeout, 5 sec */ - int delay = 5; /* default delay, 5 sec */ - int lnet = 1; /* lnet stat by default */ - int bwrt = 0; - int rdwr = 0; - int type = -1; - int idx = 0; - int rc; - int c; - - static struct option stat_opts[] = - { - {"timeout", required_argument, 0, 't' }, - {"delay" , required_argument, 0, 'd' }, - {"lnet" , no_argument, 0, 'l' }, - {"rpc" , no_argument, 0, 'c' }, - {"bw" , no_argument, 0, 'b' }, - {"rate" , no_argument, 0, 'a' }, - {"read" , no_argument, 0, 'r' }, - {"write" , no_argument, 0, 'w' }, - {"avg" , no_argument, 0, 'g' }, - {"min" , no_argument, 0, 'n' }, - {"max" , no_argument, 0, 'x' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - c = getopt_long(argc, argv, "t:d:lcbarwgnx", stat_opts, &optidx); - - if (c == -1) - break; - - switch (c) { - case 't': - timeout = atoi(optarg); - break; - case 'd': - delay = atoi(optarg); - break; - case 'l': - lnet = 1; - break; - case 'c': - lnet = 0; - break; - case 'b': - bwrt |= 1; - break; - case 'a': - bwrt |= 2; - break; - case 'r': - rdwr |= 1; - break; - case 'w': - rdwr |= 2; - break; - case 'g': - if (type == -1) { - type = 1; - break; - } - type |= 1; - break; - case 'n': - if (type == -1) { - type = 2; - break; - } - type |= 2; - break; - case 'x': - if (type == -1) { - type = 4; - break; - } - type |= 4; - break; - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (optind == argc) { - lst_print_usage(argv[0]); - return -1; - } - - if (timeout <= 0 || delay <= 0) { - fprintf(stderr, "Invalid timeout or delay value\n"); - return -1; - } - - CFS_INIT_LIST_HEAD(&head); - - while (optind < argc) { - rc = lst_stat_req_param_alloc(argv[optind++], &srp, 1); - if (rc != 0) - goto out; - - list_add_tail(&srp->srp_link, &head); - } - - while (1) { - time_t now = time(NULL); - - if (now - last < delay) { - sleep(delay - now + last); - time(&now); - } - - last = now; - - list_for_each_entry(srp, &head, srp_link) { - rc = lst_stat_ioctl(srp->srp_name, - srp->srp_count, srp->srp_ids, - timeout, &srp->srp_result[idx]); - if (rc == -1) { - lst_print_error("stat", "Failed to stat %s: %s\n", - srp->srp_name, strerror(errno)); - goto out; - } - - lst_print_stat(srp->srp_name, srp->srp_result, - idx, lnet, bwrt, rdwr, type); - - lst_reset_rpcent(&srp->srp_result[1 - idx]); - } - - idx = 1 - idx; - } - -out: - while (!list_empty(&head)) { - srp = list_entry(head.next, lst_stat_req_param_t, srp_link); - - list_del(&srp->srp_link); - lst_stat_req_param_free(srp); - } - - return rc; -} - -int -jt_lst_show_error(int argc, char **argv) -{ - struct list_head head; - lst_stat_req_param_t *srp; - lstcon_rpc_ent_t *ent; - sfw_counters_t *sfwk; - srpc_counters_t *srpc; - lnet_counters_t *lnet; - int show_rpc = 1; - int optidx = 0; - int rc = 0; - int ecount; - int c; - - static struct option show_error_opts[] = - { - {"session", no_argument, 0, 's' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - c = getopt_long(argc, argv, "s", show_error_opts, &optidx); - - if (c == -1) - break; - - switch (c) { - case 's': - show_rpc = 0; - break; - - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (optind == argc) { - lst_print_usage(argv[0]); - return -1; - } - - CFS_INIT_LIST_HEAD(&head); - - while (optind < argc) { - rc = lst_stat_req_param_alloc(argv[optind++], &srp, 0); - if (rc != 0) - goto out; - - list_add_tail(&srp->srp_link, &head); - } - - list_for_each_entry(srp, &head, srp_link) { - rc = lst_stat_ioctl(srp->srp_name, srp->srp_count, - srp->srp_ids, 5, &srp->srp_result[0]); - - if (rc == -1) { - lst_print_error(srp->srp_name, "Failed to show errors of %s: %s\n", - srp->srp_name, strerror(errno)); - goto out; - } - - fprintf(stdout, "%s:\n", srp->srp_name); - - ecount = 0; - - list_for_each_entry(ent, &srp->srp_result[0], rpe_link) { - if (ent->rpe_rpc_errno != 0) { - ecount ++; - fprintf(stderr, "RPC failure, can't show error on %s\n", - libcfs_id2str(ent->rpe_peer)); - continue; - } - - if (ent->rpe_fwk_errno != 0) { - ecount ++; - fprintf(stderr, "Framework failure, can't show error on %s\n", - libcfs_id2str(ent->rpe_peer)); - continue; - } - - sfwk = (sfw_counters_t *)&ent->rpe_payload[0]; - srpc = (srpc_counters_t *)((char *)sfwk + sizeof(*sfwk)); - lnet = (lnet_counters_t *)((char *)srpc + sizeof(*srpc)); - - if (srpc->errors == 0 && - sfwk->brw_errors == 0 && sfwk->ping_errors == 0) - continue; - - if (!show_rpc && - sfwk->brw_errors == 0 && sfwk->ping_errors == 0) - continue; - - ecount ++; - - fprintf(stderr, "%s: [Session %d brw errors, %d ping errors]%c", - libcfs_id2str(ent->rpe_peer), - sfwk->brw_errors, sfwk->ping_errors, - show_rpc ? ' ' : '\n'); - - if (!show_rpc) - continue; - - fprintf(stderr, "[RPC: %d errors, %d dropped, %d expired]\n", - srpc->errors, srpc->rpcs_dropped, srpc->rpcs_expired); - } - - fprintf(stdout, "Total %d error nodes in %s\n", ecount, srp->srp_name); - } -out: - while (!list_empty(&head)) { - srp = list_entry(head.next, lst_stat_req_param_t, srp_link); - - list_del(&srp->srp_link); - lst_stat_req_param_free(srp); - } - - return rc; -} - -int -lst_add_batch_ioctl (char *name) -{ - lstio_batch_add_args_t args = { - .lstio_bat_key = session_key, - .lstio_bat_nmlen = strlen(name), - .lstio_bat_namep = name, - }; - - return lst_ioctl (LSTIO_BATCH_ADD, &args, sizeof(args)); -} - -int -jt_lst_add_batch(int argc, char **argv) -{ - char *name; - int rc; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - if (argc != 2) { - lst_print_usage(argv[0]); - return -1; - } - - name = argv[1]; - if (strlen(name) >= LST_NAME_SIZE) { - fprintf(stderr, "Name length is limited to %d\n", - LST_NAME_SIZE - 1); - return -1; - } - - rc = lst_add_batch_ioctl(name); - if (rc == 0) - return 0; - - lst_print_error("batch", "Failed to create batch: %s\n", - strerror(errno)); - - return -1; -} - -int -lst_start_batch_ioctl (char *name, int timeout, struct list_head *resultp) -{ - lstio_batch_run_args_t args = { - .lstio_bat_key = session_key, - .lstio_bat_timeout = timeout, - .lstio_bat_nmlen = strlen(name), - .lstio_bat_namep = name, - .lstio_bat_resultp = resultp, - }; - - return lst_ioctl(LSTIO_BATCH_START, &args, sizeof(args)); -} - -int -jt_lst_start_batch(int argc, char **argv) -{ - struct list_head head; - char *batch; - int optidx = 0; - int timeout = 0; - int count = 0; - int rc; - int c; - - static struct option start_batch_opts[] = - { - {"timeout", required_argument, 0, 't' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - c = getopt_long(argc, argv, "t:", - start_batch_opts, &optidx); - - /* Detect the end of the options. */ - if (c == -1) - break; - - switch (c) { - case 't': - timeout = atoi(optarg); - break; - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (optind == argc) { - batch = LST_DEFAULT_BATCH; - - } else if (optind == argc - 1) { - batch = argv[optind]; - - } else { - lst_print_usage(argv[0]); - return -1; - } - - rc = lst_get_node_count(LST_OPC_BATCHCLI, batch, &count, NULL); - if (rc != 0) { - fprintf(stderr, "Failed to get count of nodes from %s: %s\n", - batch, strerror(errno)); - return -1; - } - - CFS_INIT_LIST_HEAD(&head); - - rc = lst_alloc_rpcent(&head, count, 0); - if (rc != 0) { - fprintf(stderr, "Out of memory\n"); - return -1; - } - - rc = lst_start_batch_ioctl(batch, timeout, &head); - - if (rc == 0) { - fprintf(stdout, "%s is running now\n", batch); - lst_free_rpcent(&head); - return 0; - } - - if (rc == -1) { - lst_print_error("batch", "Failed to start batch: %s\n", - strerror(errno)); - lst_free_rpcent(&head); - return rc; - } - - lst_print_transerr(&head, "Run batch"); - - lst_free_rpcent(&head); - - return rc; -} - -int -lst_stop_batch_ioctl(char *name, int force, struct list_head *resultp) -{ - lstio_batch_stop_args_t args = { - .lstio_bat_key = session_key, - .lstio_bat_force = force, - .lstio_bat_nmlen = strlen(name), - .lstio_bat_namep = name, - .lstio_bat_resultp = resultp, - }; - - return lst_ioctl(LSTIO_BATCH_STOP, &args, sizeof(args)); -} - -int -jt_lst_stop_batch(int argc, char **argv) -{ - struct list_head head; - char *batch; - int force = 0; - int optidx; - int count; - int rc; - int c; - - static struct option stop_batch_opts[] = - { - {"force", no_argument, 0, 'f' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - c = getopt_long(argc, argv, "f", - stop_batch_opts, &optidx); - - /* Detect the end of the options. */ - if (c == -1) - break; - - switch (c) { - case 'f': - force = 1; - break; - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (optind == argc) { - batch = LST_DEFAULT_BATCH; - - } else if (optind == argc - 1) { - batch = argv[optind]; - - } else { - lst_print_usage(argv[0]); - return -1; - } - - rc = lst_get_node_count(LST_OPC_BATCHCLI, batch, &count, NULL); - if (rc != 0) { - fprintf(stderr, "Failed to get count of nodes from %s: %s\n", - batch, strerror(errno)); - return -1; - } - - CFS_INIT_LIST_HEAD(&head); - - rc = lst_alloc_rpcent(&head, count, 0); - if (rc != 0) { - fprintf(stderr, "Out of memory\n"); - return -1; - } - - rc = lst_stop_batch_ioctl(batch, force, &head); - if (rc != 0) - goto out; - - while (1) { - lst_reset_rpcent(&head); - - rc = lst_query_batch_ioctl(batch, 0, 0, 30, &head); - if (rc != 0) - goto out; - - if (lstcon_tsbqry_stat_run(&trans_stat, 0) == 0 && - lstcon_tsbqry_stat_failure(&trans_stat, 0) == 0) - break; - - fprintf(stdout, "%d batch in stopping\n", - lstcon_tsbqry_stat_run(&trans_stat, 0)); - sleep(1); - } - - fprintf(stdout, "Batch is stopped\n"); - lst_free_rpcent(&head); - - return 0; -out: - if (rc == -1) { - lst_print_error("batch", "Failed to stop batch: %s\n", - strerror(errno)); - lst_free_rpcent(&head); - return -1; - } - - lst_print_transerr(&head, "stop batch"); - - lst_free_rpcent(&head); - - return rc; -} - -int -lst_list_batch_ioctl(int len, char *name, int index) -{ - lstio_batch_list_args_t args = { - .lstio_bat_key = session_key, - .lstio_bat_idx = index, - .lstio_bat_nmlen = len, - .lstio_bat_namep = name, - }; - - return lst_ioctl(LSTIO_BATCH_LIST, &args, sizeof(args)); -} - -int -lst_info_batch_ioctl(char *batch, int test, int server, - lstcon_test_batch_ent_t *entp, int *idxp, - int *ndentp, lstcon_node_ent_t *dentsp) -{ - lstio_batch_info_args_t args = { - .lstio_bat_key = session_key, - .lstio_bat_nmlen = strlen(batch), - .lstio_bat_namep = batch, - .lstio_bat_server = server, - .lstio_bat_testidx = test, - .lstio_bat_entp = entp, - .lstio_bat_idxp = idxp, - .lstio_bat_ndentp = ndentp, - .lstio_bat_dentsp = dentsp, - }; - - return lst_ioctl(LSTIO_BATCH_INFO, &args, sizeof(args)); -} - -int -lst_list_batch_all(void) -{ - char name[LST_NAME_SIZE]; - int rc; - int i; - - for (i = 0; ; i++) { - rc = lst_list_batch_ioctl(LST_NAME_SIZE, name, i); - if (rc == 0) { - fprintf(stdout, "%d) %s\n", i + 1, name); - continue; - } - - if (errno == ENOENT) - break; - - lst_print_error("batch", "Failed to list batch: %s\n", - strerror(errno)); - return rc; - } - - fprintf(stdout, "Total %d batches\n", i); - - return 0; -} - -int -lst_list_tsb_nodes(char *batch, int test, int server, - int count, int active, int invalid) -{ - lstcon_node_ent_t *dents; - int index = 0; - int rc; - int c; - int i; - - if (count == 0) - return 0; - - /* verbose list, show nodes in batch or test */ - dents = malloc(count * sizeof(lstcon_node_ent_t)); - if (dents == NULL) { - fprintf(stdout, "Can't allocate memory\n"); - return -1; - } - - rc = lst_info_batch_ioctl(batch, test, server, - NULL, &index, &count, dents); - if (rc != 0) { - free(dents); - lst_print_error((test > 0) ? "test" : "batch", - (test > 0) ? "Failed to query test: %s\n" : - "Failed to query batch: %s\n", - strerror(errno)); - return -1; - } - - for (i = 0, c = 0; i < count; i++) { - if ((!active && dents[i].nde_state == LST_NODE_ACTIVE) || - (!invalid && (dents[i].nde_state == LST_NODE_BUSY || - dents[i].nde_state == LST_NODE_DOWN || - dents[i].nde_state == LST_NODE_UNKNOWN))) - continue; - - fprintf(stdout, "\t%s: %s\n", - libcfs_id2str(dents[i].nde_id), - lst_node_state2str(dents[i].nde_state)); - c++; - } - - fprintf(stdout, "Total %d nodes\n", c); - free(dents); - - return 0; -} - -int -jt_lst_list_batch(int argc, char **argv) -{ - lstcon_test_batch_ent_t ent; - char *batch = NULL; - int optidx = 0; - int verbose = 0; /* list nodes in batch or test */ - int invalid = 0; - int active = 0; - int server = 0; - int ntest = 0; - int test = 0; - int c = 0; - int rc; - - static struct option list_batch_opts[] = - { - {"test", required_argument, 0, 't' }, - {"invalid", no_argument, 0, 'i' }, - {"active", no_argument, 0, 'a' }, - {"all", no_argument, 0, 'l' }, - {"server", no_argument, 0, 's' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - c = getopt_long(argc, argv, "ailst:", - list_batch_opts, &optidx); - - if (c == -1) - break; - - switch (c) { - case 'a': - verbose = active = 1; - break; - case 'i': - verbose = invalid = 1; - break; - case 'l': - verbose = active = invalid = 1; - break; - case 's': - server = 1; - break; - case 't': - test = atoi(optarg); - ntest = 1; - break; - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (optind == argc) { - /* list all batches */ - rc = lst_list_batch_all(); - return rc; - } - - if (ntest == 1 && test <= 0) { - fprintf(stderr, "Invalid test id, test id starts from 1\n"); - return -1; - } - - if (optind != argc - 1) { - lst_print_usage(argv[0]); - return -1; - } - - batch = argv[optind]; - -loop: - /* show detail of specified batch or test */ - rc = lst_info_batch_ioctl(batch, test, server, - &ent, NULL, NULL, NULL); - if (rc != 0) { - lst_print_error((test > 0) ? "test" : "batch", - (test > 0) ? "Failed to query test: %s\n" : - "Failed to query batch: %s\n", - strerror(errno)); - return -1; - } - - if (verbose) { - /* list nodes in test or batch */ - rc = lst_list_tsb_nodes(batch, test, server, - server ? ent.tbe_srv_nle.nle_nnode : - ent.tbe_cli_nle.nle_nnode, - active, invalid); - return rc; - } - - /* only show number of hosts in batch or test */ - if (test == 0) { - fprintf(stdout, "Batch: %s Tests: %d State: %d\n", - batch, ent.u.tbe_batch.bae_ntest, - ent.u.tbe_batch.bae_state); - ntest = ent.u.tbe_batch.bae_ntest; - test = 1; /* starting from test 1 */ - - } else { - fprintf(stdout, - "\tTest %d(%s) (loop: %d, concurrency: %d)\n", - test, lst_test_type2name(ent.u.tbe_test.tse_type), - ent.u.tbe_test.tse_loop, - ent.u.tbe_test.tse_concur); - ntest --; - test ++; - } - - fprintf(stdout, LST_NODES_TITLE); - fprintf(stdout, "client\t%d\t%d\t%d\t%d\t%d\n" - "server\t%d\t%d\t%d\t%d\t%d\n", - ent.tbe_cli_nle.nle_nactive, - ent.tbe_cli_nle.nle_nbusy, - ent.tbe_cli_nle.nle_ndown, - ent.tbe_cli_nle.nle_nunknown, - ent.tbe_cli_nle.nle_nnode, - ent.tbe_srv_nle.nle_nactive, - ent.tbe_srv_nle.nle_nbusy, - ent.tbe_srv_nle.nle_ndown, - ent.tbe_srv_nle.nle_nunknown, - ent.tbe_srv_nle.nle_nnode); - - if (ntest != 0) - goto loop; - - return 0; -} - -int -lst_query_batch_ioctl(char *batch, int test, int server, - int timeout, struct list_head *head) -{ - lstio_batch_query_args_t args = { - .lstio_bat_key = session_key, - .lstio_bat_testidx = test, - .lstio_bat_client = !(server), - .lstio_bat_timeout = timeout, - .lstio_bat_nmlen = strlen(batch), - .lstio_bat_namep = batch, - .lstio_bat_resultp = head, - }; - - return lst_ioctl(LSTIO_BATCH_QUERY, &args, sizeof(args)); -} - -void -lst_print_tsb_verbose(struct list_head *head, - int active, int idle, int error) -{ - lstcon_rpc_ent_t *ent; - - list_for_each_entry(ent, head, rpe_link) { - if (ent->rpe_priv[0] == 0 && active) - continue; - - if (ent->rpe_priv[0] != 0 && idle) - continue; - - if (ent->rpe_fwk_errno == 0 && error) - continue; - - fprintf(stdout, "%s [%s]: %s\n", - libcfs_id2str(ent->rpe_peer), - lst_node_state2str(ent->rpe_state), - ent->rpe_rpc_errno != 0 ? - strerror(ent->rpe_rpc_errno) : - (ent->rpe_priv[0] > 0 ? "Running" : "Idle")); - } -} - -int -jt_lst_query_batch(int argc, char **argv) -{ - lstcon_test_batch_ent_t ent; - struct list_head head; - char *batch = NULL; - time_t last = 0; - int optidx = 0; - int verbose = 0; - int server = 0; - int timeout = 5; /* default 5 seconds */ - int delay = 5; /* default 5 seconds */ - int loop = 1; /* default 1 loop */ - int active = 0; - int error = 0; - int idle = 0; - int count = 0; - int test = 0; - int rc = 0; - int c = 0; - int i; - - static struct option query_batch_opts[] = - { - {"timeout", required_argument, 0, 'o' }, - {"delay", required_argument, 0, 'd' }, - {"loop", required_argument, 0, 'c' }, - {"test", required_argument, 0, 't' }, - {"server", no_argument, 0, 's' }, - {"active", no_argument, 0, 'a' }, - {"idle", no_argument, 0, 'i' }, - {"error", no_argument, 0, 'e' }, - {"all", no_argument, 0, 'l' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - c = getopt_long(argc, argv, "o:d:c:t:saiel", - query_batch_opts, &optidx); - - /* Detect the end of the options. */ - if (c == -1) - break; - - switch (c) { - case 'o': - timeout = atoi(optarg); - break; - case 'd': - delay = atoi(optarg); - break; - case 'c': - loop = atoi(optarg); - break; - case 't': - test = atoi(optarg); - break; - case 's': - server = 1; - break; - case 'a': - active = verbose = 1; - break; - case 'i': - idle = verbose = 1; - break; - case 'e': - error = verbose = 1; - break; - case 'l': - verbose = 1; - break; - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (test < 0 || timeout <= 0 || delay <= 0 || loop <= 0) { - lst_print_usage(argv[0]); - return -1; - } - - if (optind == argc) { - batch = LST_DEFAULT_BATCH; - - } else if (optind == argc - 1) { - batch = argv[optind]; - - } else { - lst_print_usage(argv[0]); - return -1; - } - - - CFS_INIT_LIST_HEAD(&head); - - if (verbose) { - rc = lst_info_batch_ioctl(batch, test, server, - &ent, NULL, NULL, NULL); - if (rc != 0) { - fprintf(stderr, "Failed to query %s [%d]: %s\n", - batch, test, strerror(errno)); - return -1; - } - - count = server ? ent.tbe_srv_nle.nle_nnode : - ent.tbe_cli_nle.nle_nnode; - if (count == 0) { - fprintf(stdout, "Batch or test is empty\n"); - return 0; - } - } - - rc = lst_alloc_rpcent(&head, count, 0); - if (rc != 0) { - fprintf(stderr, "Out of memory\n"); - return rc; - } - - for (i = 0; i < loop; i++) { - time_t now = time(NULL); - - if (now - last < delay) { - sleep(delay - now + last); - time(&now); - } - - last = now; - - rc = lst_query_batch_ioctl(batch, test, - server, timeout, &head); - if (rc == -1) { - fprintf(stderr, "Failed to query batch: %s\n", - strerror(errno)); - break; - } - - if (verbose) { - /* Verbose mode */ - lst_print_tsb_verbose(&head, active, idle, error); - continue; - } - - fprintf(stdout, "%s [%d] ", batch, test); - - if (lstcon_rpc_stat_failure(&trans_stat, 0) != 0) { - fprintf(stdout, "%d of %d nodes are unknown, ", - lstcon_rpc_stat_failure(&trans_stat, 0), - lstcon_rpc_stat_total(&trans_stat, 0)); - } - - if (lstcon_rpc_stat_failure(&trans_stat, 0) == 0 && - lstcon_tsbqry_stat_run(&trans_stat, 0) == 0 && - lstcon_tsbqry_stat_failure(&trans_stat, 0) == 0) { - fprintf(stdout, "is stopped\n"); - continue; - } - - if (lstcon_rpc_stat_failure(&trans_stat, 0) == 0 && - lstcon_tsbqry_stat_idle(&trans_stat, 0) == 0 && - lstcon_tsbqry_stat_failure(&trans_stat, 0) == 0) { - fprintf(stdout, "is running\n"); - continue; - } - - fprintf(stdout, "stopped: %d , running: %d, failed: %d\n", - lstcon_tsbqry_stat_idle(&trans_stat, 0), - lstcon_tsbqry_stat_run(&trans_stat, 0), - lstcon_tsbqry_stat_failure(&trans_stat, 0)); - } - - lst_free_rpcent(&head); - - return rc; -} - -int -lst_parse_distribute(char *dstr, int *dist, int *span) -{ - *dist = atoi(dstr); - if (*dist <= 0) - return -1; - - dstr = strchr(dstr, ':'); - if (dstr == NULL) - return -1; - - *span = atoi(dstr + 1); - if (*span <= 0) - return -1; - - return 0; -} - -int -lst_get_bulk_param(int argc, char **argv, lst_test_bulk_param_t *bulk) -{ - char *tok = NULL; - char *end = NULL; - int rc = 0; - int i = 0; - - bulk->blk_size = 4096; - bulk->blk_opc = LST_BRW_READ; - bulk->blk_flags = LST_BRW_CHECK_NONE; - - while (i < argc) { - if (strcasestr(argv[i], "check=") == argv[i] || - strcasestr(argv[i], "c=") == argv[i]) { - tok = strchr(argv[i], '=') + 1; - - if (strcasecmp(tok, "full") == 0) { - bulk->blk_flags = LST_BRW_CHECK_FULL; - } else if (strcasecmp(tok, "simple") == 0) { - bulk->blk_flags = LST_BRW_CHECK_SIMPLE; - } else { - fprintf(stderr, "Unknow flag %s\n", tok); - return -1; - } - - } else if (strcasestr(argv[i], "size=") == argv[i] || - strcasestr(argv[i], "s=") == argv[i]) { - tok = strchr(argv[i], '=') + 1; - - bulk->blk_size = strtol(tok, &end, 0); - if (bulk->blk_size <= 0) { - fprintf(stderr, "Invalid size %s\n", tok); - return -1; - } - - if (end == NULL) - return 0; - - if (*end == 'k' || *end == 'K') - bulk->blk_size *= 1024; - else if (*end == 'm' || *end == 'M') - bulk->blk_size *= 1024 * 1024; - - if (bulk->blk_size > CFS_PAGE_SIZE * LNET_MAX_IOV) { - fprintf(stderr, "Size exceed limitation: %d bytes\n", - bulk->blk_size); - return -1; - } - - } else if (strcasecmp(argv[i], "read") == 0 || - strcasecmp(argv[i], "r") == 0) { - bulk->blk_opc = LST_BRW_READ; - - } else if (strcasecmp(argv[i], "write") == 0 || - strcasecmp(argv[i], "w") == 0) { - bulk->blk_opc = LST_BRW_WRITE; - - } else { - fprintf(stderr, "Unknow parameter: %s\n", argv[i]); - return -1; - } - - i++; - } - - return rc; -} - -int -lst_get_test_param(char *test, int argc, char **argv, void **param, int *plen) -{ - lst_test_bulk_param_t *bulk = NULL; - int type; - - type = lst_test_name2type(test); - if (type < 0) { - fprintf(stderr, "Unknow test name %s\n", test); - return -1; - } - - switch (type) { - case LST_TEST_PING: - break; - - case LST_TEST_BULK: - bulk = malloc(sizeof(*bulk)); - if (bulk == NULL) { - fprintf(stderr, "Out of memory\n"); - return -1; - } - - memset(bulk, 0, sizeof(*bulk)); - - if (lst_get_bulk_param(argc, argv, bulk) != 0) { - free(bulk); - return -1; - } - - *param = bulk; - *plen = sizeof(*bulk); - - break; - - default: - break; - } - - /* TODO: parse more parameter */ - return type; -} - -int -lst_add_test_ioctl(char *batch, int type, int loop, int concur, - int dist, int span, char *sgrp, char *dgrp, - void *param, int plen, int *retp, struct list_head *resultp) -{ - lstio_test_args_t args = { - .lstio_tes_key = session_key, - .lstio_tes_bat_nmlen = strlen(batch), - .lstio_tes_bat_name = batch, - .lstio_tes_type = type, - .lstio_tes_loop = loop, - .lstio_tes_concur = concur, - .lstio_tes_dist = dist, - .lstio_tes_span = span, - .lstio_tes_sgrp_nmlen = strlen(sgrp), - .lstio_tes_sgrp_name = sgrp, - .lstio_tes_dgrp_nmlen = strlen(dgrp), - .lstio_tes_dgrp_name = dgrp, - .lstio_tes_param_len = plen, - .lstio_tes_param = param, - .lstio_tes_retp = retp, - .lstio_tes_resultp = resultp, - }; - - return lst_ioctl(LSTIO_TEST_ADD, &args, sizeof(args)); -} - -int -jt_lst_add_test(int argc, char **argv) -{ - struct list_head head; - char *batch = NULL; - char *test = NULL; - char *dstr = NULL; - char *from = NULL; - char *to = NULL; - void *param = NULL; - int optidx = 0; - int concur = 1; - int loop = -1; - int dist = 1; - int span = 1; - int plen = 0; - int fcount = 0; - int tcount = 0; - int ret = 0; - int type; - int rc; - int c; - - static struct option add_test_opts[] = - { - {"batch", required_argument, 0, 'b' }, - {"concurrency", required_argument, 0, 'c' }, - {"distribute", required_argument, 0, 'd' }, - {"from", required_argument, 0, 'f' }, - {"to", required_argument, 0, 't' }, - {"loop", required_argument, 0, 'l' }, - {0, 0, 0, 0 } - }; - - if (session_key == 0) { - fprintf(stderr, - "Can't find env LST_SESSION or value is not valid\n"); - return -1; - } - - while (1) { - c = getopt_long(argc, argv, "b:c:d:f:l:t:", - add_test_opts, &optidx); - - /* Detect the end of the options. */ - if (c == -1) - break; - - switch (c) { - case 'b': - batch = optarg; - break; - case 'c': - concur = atoi(optarg); - break; - case 'd': - dstr = optarg; - break; - case 'f': - from = optarg; - break; - case 'l': - loop = atoi(optarg); - break; - case 't': - to = optarg; - break; - default: - lst_print_usage(argv[0]); - return -1; - } - } - - if (optind == argc || from == NULL || to == NULL) { - lst_print_usage(argv[0]); - return -1; - } - - if (concur <= 0 || concur > LST_MAX_CONCUR) { - fprintf(stderr, "Invalid concurrency of test: %d\n", concur); - return -1; - } - - if (batch == NULL) - batch = LST_DEFAULT_BATCH; - - if (dstr != NULL) { - rc = lst_parse_distribute(dstr, &dist, &span); - if (rc != 0) { - fprintf(stderr, "Invalid distribution: %s\n", dstr); - return -1; - } - } - - test = argv[optind++]; - - argc -= optind; - argv += optind; - - type = lst_get_test_param(test, argc, argv, ¶m, &plen); - if (type < 0) { - fprintf(stderr, "Failed to add test (%s)\n", test); - return -1; - } - - CFS_INIT_LIST_HEAD(&head); - - rc = lst_get_node_count(LST_OPC_GROUP, from, &fcount, NULL); - if (rc != 0) { - fprintf(stderr, "Can't get count of nodes from %s: %s\n", - from, strerror(errno)); - goto out; - } - - rc = lst_get_node_count(LST_OPC_GROUP, to, &tcount, NULL); - if (rc != 0) { - fprintf(stderr, "Can't get count of nodes from %s: %s\n", - to, strerror(errno)); - goto out; - } - - rc = lst_alloc_rpcent(&head, fcount > tcount ? fcount : tcount, 0); - if (rc != 0) { - fprintf(stderr, "Out of memory\n"); - goto out; - } - - rc = lst_add_test_ioctl(batch, type, loop, concur, - dist, span, from, to, param, plen, &ret, &head); - - if (rc == 0) { - fprintf(stdout, "Test was added successfully\n"); - if (ret != 0) { - fprintf(stdout, "Server group contains userland test " - "nodes, old version of tcplnd can't accept " - "connection request\n"); - } - - goto out; - } - - if (rc == -1) { - lst_print_error("test", "Failed to add test: %s\n", - strerror(errno)); - goto out; - } - - lst_print_transerr(&head, "add test"); -out: - lst_free_rpcent(&head); - - if (param != NULL) - free(param); - - return rc; -} - -static command_t lst_cmdlist[] = { - {"new_session", jt_lst_new_session, NULL, - "Usage: lst new_session [--timeout TIME] [--force] [NAME]" }, - {"end_session", jt_lst_end_session, NULL, - "Usage: lst end_session" }, - {"show_session", jt_lst_show_session, NULL, - "Usage: lst show_session" }, - {"ping", jt_lst_ping , NULL, - "Usage: lst ping [--group NAME] [--batch NAME] [--session] [--nodes IDS]" }, - {"add_group", jt_lst_add_group, NULL, - "Usage: lst group NAME IDs [IDs]..." }, - {"del_group", jt_lst_del_group, NULL, - "Usage: lst del_group NAME" }, - {"update_group", jt_lst_update_group, NULL, - "Usage: lst update_group NAME [--clean] [--refresh] [--remove IDs]" }, - {"list_group", jt_lst_list_group, NULL, - "Usage: lst list_group [--active] [--busy] [--down] [--unknown] GROUP ..." }, - {"stat", jt_lst_stat, NULL, - "Usage: lst stat [--bw] [--rate] [--read] [--write] [--max] [--min] [--avg] " - " [--timeout #] [--delay #] GROUP [GROUP]" }, - {"show_error", jt_lst_show_error, NULL, - "Usage: lst show_error NAME | IDS ..." }, - {"add_batch", jt_lst_add_batch, NULL, - "Usage: lst add_batch NAME" }, - {"run", jt_lst_start_batch, NULL, - "Usage: lst run [--timeout TIME] [NAME]" }, - {"stop", jt_lst_stop_batch, NULL, - "Usage: lst stop [--force] BATCH_NAME" }, - {"list_batch", jt_lst_list_batch, NULL, - "Usage: lst list_batch NAME [--test ID] [--server]" }, - {"query", jt_lst_query_batch, NULL, - "Usage: lst query [--test ID] [--server] [--timeout TIME] NAME" }, - {"add_test", jt_lst_add_test, NULL, - "Usage: lst add_test [--batch BATCH] [--loop #] [--concurrency #] " - " [--distribute #:#] [--from GROUP] [--to GROUP] TEST..." }, - {"help", Parser_help, 0, "help" }, - {0, 0, 0, NULL } -}; - -int -lst_initialize(void) -{ - char *key; - - key = getenv("LST_SESSION"); - - if (key == NULL) { - session_key = 0; - return 0; - } - - session_key = atoi(key); - - return 0; -} - -int -main(int argc, char **argv) -{ - setlinebuf(stdout); - - if (lst_initialize() < 0) - exit(0); - - if (ptl_initialize(argc, argv) < 0) - exit(0); - - Parser_init("lst > ", lst_cmdlist); - - if (argc != 1) - return Parser_execarg(argc - 1, argv + 1, lst_cmdlist); - - Parser_commands(); - - return 0; -} diff --git a/lnet/utils/lstclient.c b/lnet/utils/lstclient.c deleted file mode 100644 index c9a70f86a761249744dc1cffe4323ddfcf885178..0000000000000000000000000000000000000000 --- a/lnet/utils/lstclient.c +++ /dev/null @@ -1,217 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Liang Zhen <liangzhen@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org - */ -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <errno.h> -#include <pwd.h> -#include <lnet/lnetctl.h> -#include <lnet/lnetst.h> -#include "../selftest/rpc.h" -#include "../selftest/selftest.h" - -static int lstjn_stopping = 0; -static int lstjn_intialized = 0; - -unsigned int libcfs_subsystem_debug = ~0 - (S_LNET | S_LND); -unsigned int libcfs_debug = 0; - -static struct option lstjn_options[] = -{ - {"sesid", required_argument, 0, 's' }, - {"group", required_argument, 0, 'g' }, - {"server_mode", no_argument, 0, 'm' }, - {0, 0, 0, 0 } -}; - -void -lstjn_stop (int sig) -{ - lstjn_stopping = 1; -} - -void -lstjn_rpc_done(srpc_client_rpc_t *rpc) -{ - if (!lstjn_intialized) - lstjn_intialized = 1; -} - -int -lstjn_join_session(char *ses, char *grp) -{ - lnet_process_id_t sesid; - srpc_client_rpc_t *rpc; - srpc_join_reqst_t *req; - srpc_join_reply_t *rep; - srpc_mksn_reqst_t *sreq; - srpc_mksn_reply_t *srep; - int rc; - - sesid.pid = LUSTRE_LNET_PID; - sesid.nid = libcfs_str2nid(ses); - if (sesid.nid == LNET_NID_ANY) { - fprintf(stderr, "Invalid session NID: %s\n", ses); - return -1; - } - - rpc = sfw_create_rpc(sesid, SRPC_SERVICE_JOIN, 0, - 0, lstjn_rpc_done, NULL); - if (rpc == NULL) { - fprintf(stderr, "Out of memory\n"); - return -1; - } - - req = &rpc->crpc_reqstmsg.msg_body.join_reqst; - - req->join_sid = LST_INVALID_SID; - strncpy(req->join_group, grp, LST_NAME_SIZE); - - sfw_post_rpc(rpc); - - for (;;) { - rc = selftest_wait_events(); - - if (lstjn_intialized) - break; - } - - if (rpc->crpc_status != 0) { - fprintf(stderr, "Failed to send RPC to console: %s\n", - strerror(rpc->crpc_status)); - srpc_client_rpc_decref(rpc); - return -1; - } - - sfw_unpack_message(&rpc->crpc_replymsg); - - rep = &rpc->crpc_replymsg.msg_body.join_reply; - if (rep->join_status != 0) { - fprintf(stderr, "Can't join session %s group %s: %s\n", - ses, grp, strerror(rep->join_status)); - srpc_client_rpc_decref(rpc); - return -1; - } - - sreq = &rpc->crpc_reqstmsg.msg_body.mksn_reqst; - sreq->mksn_sid = rep->join_sid; - sreq->mksn_force = 0; - strcpy(sreq->mksn_name, rep->join_session); - - srep = &rpc->crpc_replymsg.msg_body.mksn_reply; - - rc = sfw_make_session(sreq, srep); - if (rc != 0 || srep->mksn_status != 0) { - fprintf(stderr, "Can't create session: %d, %s\n", - rc, strerror(srep->mksn_status)); - srpc_client_rpc_decref(rpc); - return -1; - } - - fprintf(stdout, "Session %s, ID: %s, %Lu\n", - ses, libcfs_nid2str(rep->join_sid.ses_nid), - rep->join_sid.ses_stamp); - - srpc_client_rpc_decref(rpc); - - return 0; -} - -int -main(int argc, char **argv) -{ - char *ses = NULL; - char *grp = NULL; - int server_mode_flag = 0; - int optidx; - int c; - int rc; - - const char *usage_string = - "Usage: lstclient --sesid ID --group GROUP [--server_mode]\n"; - - while (1) { - c = getopt_long(argc, argv, "s:g:m", - lstjn_options, &optidx); - - if (c == -1) - break; - - switch (c) { - case 's': - ses = optarg; - break; - case 'g': - grp = optarg; - break; - case 'm': - server_mode_flag = 1; - break; - default: - fprintf(stderr, usage_string); - return -1; - } - } - - if (optind != argc || grp == NULL || ses == NULL) { - fprintf(stderr, usage_string); - return -1; - } - - rc = libcfs_debug_init(5 * 1024 * 1024); - if (rc != 0) { - CERROR("libcfs_debug_init() failed: %d\n", rc); - return -1; - } - - rc = LNetInit(); - if (rc != 0) { - CERROR("LNetInit() failed: %d\n", rc); - libcfs_debug_cleanup(); - return -1; - } - - if (server_mode_flag) - lnet_server_mode(); - - rc = lnet_selftest_init(); - if (rc != 0) { - fprintf(stderr, "Can't startup selftest\n"); - LNetFini(); - libcfs_debug_cleanup(); - - return -1; - } - - rc = lstjn_join_session(ses, grp); - if (rc != 0) - goto out; - - signal(SIGINT, lstjn_stop); - - fprintf(stdout, "Start handling selftest requests, Ctl-C to stop\n"); - - while (!lstjn_stopping) { - selftest_wait_events(); - - if (!sfw_session_removed()) - continue; - - fprintf(stdout, "Session ended\n"); - break; - } - -out: - lnet_selftest_fini(); - - LNetFini(); - - libcfs_debug_cleanup(); - - return rc; -} diff --git a/lnet/utils/parser.c b/lnet/utils/parser.c deleted file mode 100644 index ee9604ed09d5fd3144823b7b641b894976fc10a8..0000000000000000000000000000000000000000 --- a/lnet/utils/parser.c +++ /dev/null @@ -1,642 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> -#include <string.h> -#include <stddef.h> -#include <unistd.h> -#include <sys/param.h> -#include <assert.h> -#include <lnet/api-support.h> - -#include "parser.h" - -static command_t * top_level; /* Top level of commands, initialized by - * InitParser */ -static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */ -static int done; /* Set to 1 if user types exit or quit */ - - -/* static functions */ -static char *skipwhitespace(char *s); -static char *skiptowhitespace(char *s); -static command_t *find_cmd(char *name, command_t cmds[], char **next); -static int process(char *s, char **next, command_t *lookup, command_t **result, - char **prev); -static void print_commands(char *str, command_t *table); - -static char * skipwhitespace(char * s) -{ - char * t; - int len; - - len = (int)strlen(s); - for (t = s; t <= s + len && isspace(*t); t++); - return(t); -} - - -static char * skiptowhitespace(char * s) -{ - char * t; - - for (t = s; *t && !isspace(*t); t++); - return(t); -} - -static int line2args(char *line, char **argv, int maxargs) -{ - char *arg; - int i = 0; - - arg = strtok(line, " \t"); - if ( arg ) { - argv[i] = arg; - i++; - } else - return 0; - - while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) { - argv[i] = arg; - i++; - } - return i; -} - -/* find a command -- return it if unique otherwise print alternatives */ -static command_t *Parser_findargcmd(char *name, command_t cmds[]) -{ - command_t *cmd; - - for (cmd = cmds; cmd->pc_name; cmd++) { - if (strcmp(name, cmd->pc_name) == 0) - return cmd; - } - return NULL; -} - -int Parser_execarg(int argc, char **argv, command_t cmds[]) -{ - command_t *cmd; - - cmd = Parser_findargcmd(argv[0], cmds); - if ( cmd ) { - int rc = (cmd->pc_func)(argc, argv); - if (rc == CMD_HELP) - fprintf(stderr, "%s\n", cmd->pc_help); - return rc; - } else { - printf("Try interactive use without arguments or use one of:\n"); - for (cmd = cmds; cmd->pc_name; cmd++) - printf("\"%s\" ", cmd->pc_name); - printf("\nas argument.\n"); - } - return -1; -} - -/* returns the command_t * (NULL if not found) corresponding to a - _partial_ match with the first token in name. It sets *next to - point to the following token. Does not modify *name. */ -static command_t * find_cmd(char * name, command_t cmds[], char ** next) -{ - int i, len; - - if (!cmds || !name ) - return NULL; - - /* This sets name to point to the first non-white space character, - and next to the first whitespace after name, len to the length: do - this with strtok*/ - name = skipwhitespace(name); - *next = skiptowhitespace(name); - len = *next - name; - if (len == 0) - return NULL; - - for (i = 0; cmds[i].pc_name; i++) { - if (strncasecmp(name, cmds[i].pc_name, len) == 0) { - *next = skipwhitespace(*next); - return(&cmds[i]); - } - } - return NULL; -} - -/* Recursively process a command line string s and find the command - corresponding to it. This can be ambiguous, full, incomplete, - non-existent. */ -static int process(char *s, char ** next, command_t *lookup, - command_t **result, char **prev) -{ - *result = find_cmd(s, lookup, next); - *prev = s; - - /* non existent */ - if ( ! *result ) - return CMD_NONE; - - /* found entry: is it ambigous, i.e. not exact command name and - more than one command in the list matches. Note that find_cmd - points to the first ambiguous entry */ - if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) && - find_cmd(s, (*result) + 1, next)) - return CMD_AMBIG; - - /* found a unique command: component or full? */ - if ( (*result)->pc_func ) { - return CMD_COMPLETE; - } else { - if ( *next == '\0' ) { - return CMD_INCOMPLETE; - } else { - return process(*next, next, (*result)->pc_sub_cmd, result, prev); - } - } -} - -#ifdef HAVE_LIBREADLINE -static command_t * match_tbl; /* Command completion against this table */ -static char * command_generator(const char * text, int state) -{ - static int index, - len; - char *name; - - /* Do we have a match table? */ - if (!match_tbl) - return NULL; - - /* If this is the first time called on this word, state is 0 */ - if (!state) { - index = 0; - len = (int)strlen(text); - } - - /* Return next name in the command list that paritally matches test */ - while ( (name = (match_tbl + index)->pc_name) ) { - index++; - - if (strncasecmp(name, text, len) == 0) { - return(strdup(name)); - } - } - - /* No more matches */ - return NULL; -} - -/* probably called by readline */ -static char **command_completion(char * text, int start, int end) -{ - command_t * table; - char * pos; - - match_tbl = top_level; - - for (table = find_cmd(rl_line_buffer, match_tbl, &pos); - table; table = find_cmd(pos, match_tbl, &pos)) - { - - if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd; - } - - return completion_matches(text, command_generator); -} -#endif - -/* take a string and execute the function or print help */ -int execute_line(char * line) -{ - command_t *cmd, *ambig; - char *prev; - char *next, *tmp; - char *argv[MAXARGS]; - int i; - int rc = 0; - - switch( process(line, &next, top_level, &cmd, &prev) ) { - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, cmd, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - cmd = ambig + 1; - } - fprintf(stderr, "\n"); - break; - case CMD_NONE: - fprintf(stderr, "No such command, type help\n"); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_COMPLETE: - i = line2args(line, argv, MAXARGS); - rc = (cmd->pc_func)(i, argv); - - if (rc == CMD_HELP) - fprintf(stderr, "%s\n", cmd->pc_help); - - break; - } - - return rc; -} - -int -noop_fn () -{ - return (0); -} - -/* just in case you're ever in an airplane and discover you - forgot to install readline-dev. :) */ -int init_input() -{ - int interactive = isatty (fileno (stdin)); - -#ifdef HAVE_LIBREADLINE - using_history(); - stifle_history(HISTORY); - - if (!interactive) - { - rl_prep_term_function = (rl_vintfunc_t *)noop_fn; - rl_deprep_term_function = (rl_voidfunc_t *)noop_fn; - } - - rl_attempted_completion_function = (CPPFunction *)command_completion; - rl_completion_entry_function = (void *)command_generator; -#endif - return interactive; -} - -#ifndef HAVE_LIBREADLINE -#define add_history(s) -char * readline(char * prompt) -{ - char line[2048]; - int n = 0; - if (prompt) - printf ("%s", prompt); - if (fgets(line, sizeof(line), stdin) == NULL) - return (NULL); - n = strlen(line); - if (n && line[n-1] == '\n') - line[n-1] = '\0'; - return strdup(line); -} -#endif - -/* this is the command execution machine */ -int Parser_commands(void) -{ - char *line, *s; - int rc = 0; - int interactive; - - interactive = init_input(); - - while(!done) { - line = readline(interactive ? parser_prompt : NULL); - - if (!line) break; - - s = skipwhitespace(line); - - if (*s) { - add_history(s); - rc = execute_line(s); - - /* reset optind to 0 to tell getopt - * to reinitialize itself */ - optind = 0; - } - - free(line); - } - return rc; -} - - -/* sets the parser prompt */ -void Parser_init(char * prompt, command_t * cmds) -{ - done = 0; - top_level = cmds; - if (parser_prompt) free(parser_prompt); - parser_prompt = strdup(prompt); -} - -/* frees the parser prompt */ -void Parser_exit(int argc, char *argv[]) -{ - done = 1; - free(parser_prompt); - parser_prompt = NULL; -} - -/* convert a string to an integer */ -int Parser_int(char *s, int *val) -{ - int ret; - - if (*s != '0') - ret = sscanf(s, "%d", val); - else if (*(s+1) != 'x') - ret = sscanf(s, "%o", val); - else { - s++; - ret = sscanf(++s, "%x", val); - } - - return(ret); -} - - -void Parser_qhelp(int argc, char *argv[]) { - - printf("Available commands are:\n"); - - print_commands(NULL, top_level); - printf("For more help type: help command-name\n"); -} - -int Parser_help(int argc, char **argv) -{ - char line[1024]; - char *next, *prev, *tmp; - command_t *result, *ambig; - int i; - - if ( argc == 1 ) { - Parser_qhelp(argc, argv); - return 0; - } - - line[0]='\0'; - for ( i = 1 ; i < argc ; i++ ) { - strcat(line, argv[i]); - } - - switch ( process(line, &next, top_level, &result, &prev) ) { - case CMD_COMPLETE: - fprintf(stderr, "%s: %s\n",line, result->pc_help); - break; - case CMD_NONE: - fprintf(stderr, "%s: Unknown command.\n", line); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; result->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, result, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - result = ambig + 1; - } - fprintf(stderr, "\n"); - break; - } - return 0; -} - - -void Parser_printhelp(char *cmd) -{ - char *argv[] = { "help", cmd }; - Parser_help(2, argv); -} - -/************************************************************************* - * COMMANDS * - *************************************************************************/ - - -static void print_commands(char * str, command_t * table) { - command_t * cmds; - char buf[80]; - - for (cmds = table; cmds->pc_name; cmds++) { - if (cmds->pc_func) { - if (str) printf("\t%s %s\n", str, cmds->pc_name); - else printf("\t%s\n", cmds->pc_name); - } - if (cmds->pc_sub_cmd) { - if (str) { - sprintf(buf, "%s %s", str, cmds->pc_name); - print_commands(buf, cmds->pc_sub_cmd); - } else { - print_commands(cmds->pc_name, cmds->pc_sub_cmd); - } - } - } -} - -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len) -{ - char *line = NULL; - int size = strlen(prompt) + strlen(deft) + 8; - char *theprompt; - theprompt = malloc(size); - assert(theprompt); - - sprintf(theprompt, "%s [%s]: ", prompt, deft); - - line = readline(theprompt); - free(theprompt); - - if ( line == NULL || *line == '\0' ) { - strncpy(res, deft, len); - } else { - strncpy(res, line, len); - } - - if ( line ) { - free(line); - return res; - } else { - return NULL; - } -} - -/* get integer from prompt, loop forever to get it */ -int Parser_getint(const char *prompt, long min, long max, long deft, int base) -{ - int rc; - long result; - char *line; - int size = strlen(prompt) + 40; - char *theprompt = malloc(size); - assert(theprompt); - sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft); - - fflush(stdout); - - do { - line = NULL; - line = readline(theprompt); - if ( !line ) { - fprintf(stdout, "Please enter an integer.\n"); - fflush(stdout); - continue; - } - if ( *line == '\0' ) { - free(line); - result = deft; - break; - } - rc = Parser_arg2int(line, &result, base); - free(line); - if ( rc != 0 ) { - fprintf(stdout, "Invalid string.\n"); - fflush(stdout); - } else if ( result > max || result < min ) { - fprintf(stdout, "Error: response must lie between %ld and %ld.\n", - min, max); - fflush(stdout); - } else { - break; - } - } while ( 1 ) ; - - if (theprompt) - free(theprompt); - return result; - -} - -/* get boolean (starting with YyNn; loop forever */ -int Parser_getbool(const char *prompt, int deft) -{ - int result = 0; - char *line; - int size = strlen(prompt) + 8; - char *theprompt = malloc(size); - assert(theprompt); - - fflush(stdout); - - if ( deft != 0 && deft != 1 ) { - fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n", - deft); - assert ( 0 ); - } - sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y"); - - do { - line = NULL; - line = readline(theprompt); - if ( line == NULL ) { - result = deft; - break; - } - if ( *line == '\0' ) { - result = deft; - break; - } - if ( *line == 'y' || *line == 'Y' ) { - result = 1; - break; - } - if ( *line == 'n' || *line == 'N' ) { - result = 0; - break; - } - if ( line ) - free(line); - fprintf(stdout, "Invalid string. Must start with yY or nN\n"); - fflush(stdout); - } while ( 1 ); - - if ( line ) - free(line); - if ( theprompt ) - free(theprompt); - return result; -} - -/* parse int out of a string or prompt for it */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base) -{ - long result; - int rc; - - rc = Parser_arg2int(inp, &result, base); - - if ( rc == 0 ) { - return result; - } else { - return Parser_getint(prompt, deft, min, max, base); - } -} - -/* parse int out of a string or prompt for it */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len) -{ - if ( inp == NULL || *inp == '\0' ) { - return Parser_getstr(prompt, deft, answer, len); - } else - return inp; -} - -/* change a string into a number: return 0 on success. No invalid characters - allowed. The processing of base and validity follows strtol(3)*/ -int Parser_arg2int(const char *inp, long *result, int base) -{ - char *endptr; - - if ( (base !=0) && (base < 2 || base > 36) ) - return 1; - - *result = strtol(inp, &endptr, base); - - if ( *inp != '\0' && *endptr == '\0' ) - return 0; - else - return 1; -} - -int Parser_quit(int argc, char **argv) -{ - argc = argc; - argv = argv; - done = 1; - return 0; -} diff --git a/lnet/utils/parser.h b/lnet/utils/parser.h deleted file mode 100644 index 9e7e95a3b4073503ddeb443b6e7d231cb20ad189..0000000000000000000000000000000000000000 --- a/lnet/utils/parser.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#define HISTORY 100 /* Don't let history grow unbounded */ -#define MAXARGS 512 - -#define CMD_COMPLETE 0 -#define CMD_INCOMPLETE 1 -#define CMD_NONE 2 -#define CMD_AMBIG 3 -#define CMD_HELP 4 - -typedef struct parser_cmd { - char *pc_name; - int (* pc_func)(int, char **); - struct parser_cmd * pc_sub_cmd; - char *pc_help; -} command_t; - -typedef struct argcmd { - char *ac_name; - int (*ac_func)(int, char **); - char *ac_help; -} argcmd_t; - -typedef struct network { - char *type; - char *server; - int port; -} network_t; - -int Parser_quit(int argc, char **argv); -void Parser_init(char *, command_t *); /* Set prompt and load command list */ -int Parser_commands(void); /* Start the command parser */ -void Parser_qhelp(int, char **); /* Quick help routine */ -int Parser_help(int, char **); /* Detailed help routine */ -void Parser_printhelp(char *); /* Detailed help routine */ -void Parser_exit(int, char **); /* Shuts down command parser */ -int Parser_execarg(int argc, char **argv, command_t cmds[]); -int execute_line(char * line); - -/* Converts a string to an integer */ -int Parser_int(char *, int *); - -/* Prompts for a string, with default values and a maximum length */ -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len); - -/* Prompts for an integer, with minimum, maximum and default values and base */ -int Parser_getint(const char *prompt, long min, long max, long deft, - int base); - -/* Prompts for a yes/no, with default */ -int Parser_getbool(const char *prompt, int deft); - -/* Extracts an integer from a string, or prompts if it cannot get one */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base); - -/* Extracts a word from the input, or propmts if it cannot get one */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len); - -/* Extracts an integer from a string with a base */ -int Parser_arg2int(const char *inp, long *result, int base); - -#endif diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c deleted file mode 100644 index 96d5a5e307705522eea7ba59e708bdaade05de40..0000000000000000000000000000000000000000 --- a/lnet/utils/portals.c +++ /dev/null @@ -1,1710 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <sys/types.h> -#ifdef HAVE_NETDB_H -#include <netdb.h> -#endif -#include <sys/socket.h> -#ifdef HAVE_NETINET_TCP_H -#include <netinet/tcp.h> -#endif -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#ifdef HAVE_SYS_IOCTL_H -#include <sys/ioctl.h> -#endif -#ifndef _IOWR -#include "ioctl.h" -#endif -#include <errno.h> -#include <unistd.h> -#include <time.h> -#include <stdarg.h> -#ifdef HAVE_ENDIAN_H -#include <endian.h> -#endif - -#include <libcfs/portals_utils.h> -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> -#include <lnet/socklnd.h> -#include "parser.h" - -unsigned int libcfs_debug; -unsigned int libcfs_printk = D_CANTMASK; - -static int g_net_set; -static __u32 g_net; - -/* Convert a string boolean to an int; "enable" -> 1 */ -int -lnet_parse_bool (int *b, char *str) -{ - if (!strcasecmp (str, "no") || - !strcasecmp (str, "n") || - !strcasecmp (str, "off") || - !strcasecmp (str, "down") || - !strcasecmp (str, "disable")) - { - *b = 0; - return (0); - } - - if (!strcasecmp (str, "yes") || - !strcasecmp (str, "y") || - !strcasecmp (str, "on") || - !strcasecmp (str, "up") || - !strcasecmp (str, "enable")) - { - *b = 1; - return (0); - } - - return (-1); -} - -int -lnet_parse_port (int *port, char *str) -{ - char *end; - - *port = strtol (str, &end, 0); - - if (*end == 0 && /* parsed whole string */ - *port > 0 && *port < 65536) /* minimal sanity check */ - return (0); - - return (-1); -} - -#ifdef HAVE_GETHOSTBYNAME -static struct hostent * -ptl_gethostbyname(char * hname) { - struct hostent *he; - he = gethostbyname(hname); - if (!he) { - switch(h_errno) { - case HOST_NOT_FOUND: - case NO_ADDRESS: - fprintf(stderr, "Unable to resolve hostname: %s\n", - hname); - break; - default: - fprintf(stderr, "gethostbyname error for %s: %s\n", - hname, strerror(h_errno)); - break; - } - return NULL; - } - return he; -} -#endif - -int -lnet_parse_ipquad (__u32 *ipaddrp, char *str) -{ - int a; - int b; - int c; - int d; - - if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 && - (a & ~0xff) == 0 && (b & ~0xff) == 0 && - (c & ~0xff) == 0 && (d & ~0xff) == 0) - { - *ipaddrp = (a<<24)|(b<<16)|(c<<8)|d; - return (0); - } - - return (-1); -} - -int -lnet_parse_ipaddr (__u32 *ipaddrp, char *str) -{ -#ifdef HAVE_GETHOSTBYNAME - struct hostent *he; -#endif - - if (!strcmp (str, "_all_")) { - *ipaddrp = 0; - return (0); - } - - if (lnet_parse_ipquad(ipaddrp, str) == 0) - return (0); - -#ifdef HAVE_GETHOSTBYNAME - if ((('a' <= str[0] && str[0] <= 'z') || - ('A' <= str[0] && str[0] <= 'Z')) && - (he = ptl_gethostbyname (str)) != NULL) { - __u32 addr = *(__u32 *)he->h_addr; - - *ipaddrp = ntohl(addr); /* HOST byte order */ - return (0); - } -#endif - - return (-1); -} - -char * -ptl_ipaddr_2_str (__u32 ipaddr, char *str, int lookup) -{ -#ifdef HAVE_GETHOSTBYNAME - __u32 net_ip; - struct hostent *he; - - if (lookup) { - net_ip = htonl (ipaddr); - he = gethostbyaddr (&net_ip, sizeof (net_ip), AF_INET); - if (he != NULL) { - strcpy(str, he->h_name); - return (str); - } - } -#endif - - sprintf (str, "%d.%d.%d.%d", - (ipaddr >> 24) & 0xff, (ipaddr >> 16) & 0xff, - (ipaddr >> 8) & 0xff, ipaddr & 0xff); - return (str); -} - -int -lnet_parse_time (time_t *t, char *str) -{ - char *end; - int n; - struct tm tm; - - *t = strtol (str, &end, 0); - if (*end == 0) /* parsed whole string */ - return (0); - - memset (&tm, 0, sizeof (tm)); - n = sscanf (str, "%d-%d-%d-%d:%d:%d", - &tm.tm_year, &tm.tm_mon, &tm.tm_mday, - &tm.tm_hour, &tm.tm_min, &tm.tm_sec); - if (n != 6) - return (-1); - - tm.tm_mon--; /* convert to 0 == Jan */ - tm.tm_year -= 1900; /* y2k quirk */ - tm.tm_isdst = -1; /* dunno if it's daylight savings... */ - - *t = mktime (&tm); - if (*t == (time_t)-1) - return (-1); - - return (0); -} - -int g_net_is_set (char *cmd) -{ - if (g_net_set) - return 1; - - if (cmd != NULL) - fprintf(stderr, - "You must run the 'network' command before '%s'.\n", - cmd); - return 0; -} - -int g_net_is_compatible (char *cmd, ...) -{ - va_list ap; - int nal; - - if (!g_net_is_set(cmd)) - return 0; - - va_start(ap, cmd); - - do { - nal = va_arg (ap, int); - if (nal == LNET_NETTYP(g_net)) { - va_end (ap); - return 1; - } - } while (nal != 0); - - va_end (ap); - - if (cmd != NULL) - fprintf (stderr, - "Command %s not compatible with %s NAL\n", - cmd, - libcfs_lnd2str(LNET_NETTYP(g_net))); - return 0; -} - -int ptl_initialize(int argc, char **argv) -{ - register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH, - LNET_DEV_MAJOR, LNET_DEV_MINOR); - return 0; -} - - -int jt_ptl_network(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - __u32 net = LNET_NIDNET(LNET_NID_ANY); - int rc; - - if (argc < 2) { - fprintf(stderr, "usage: %s <net>|up|down\n", argv[0]); - return 0; - } - - if (!strcmp(argv[1], "unconfigure") || - !strcmp(argv[1], "down")) { - LIBCFS_IOC_INIT(data); - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_UNCONFIGURE, &data); - - if (rc == 0) { - printf ("LNET ready to unload\n"); - return 0; - } - - if (errno == EBUSY) - fprintf(stderr, "LNET busy\n"); - else - fprintf(stderr, "LNET unconfigure error %d: %s\n", - errno, strerror(errno)); - return -1; - } - - if (!strcmp(argv[1], "configure") || - !strcmp(argv[1], "up")) { - LIBCFS_IOC_INIT(data); - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CONFIGURE, &data); - - if (rc == 0) { - printf ("LNET configured\n"); - return 0; - } - - fprintf(stderr, "LNET configure error %d: %s\n", - errno, strerror(errno)); - return -1; - } - - net = libcfs_str2net(argv[1]); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - fprintf(stderr, "Can't parse net %s\n", argv[1]); - return -1; - } - - g_net_set = 1; - g_net = net; - return 0; -} - -int -jt_ptl_list_nids(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int all = 0, return_nid = 0; - int count; - int rc; - - all = (argc == 2) && (strcmp(argv[1], "all") == 0); - /* Hack to pass back value */ - return_nid = (argc == 2) && (argv[1][0] == 1); - - if ((argc > 2) && !(all || return_nid)) { - fprintf(stderr, "usage: %s [all]\n", argv[0]); - return 0; - } - - for (count = 0;; count++) { - LIBCFS_IOC_INIT (data); - data.ioc_count = count; - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_NI, &data); - - if (rc < 0) { - if ((count > 0) && (errno == ENOENT)) - /* We found them all */ - break; - fprintf(stderr,"IOC_LIBCFS_GET_NI error %d: %s\n", - errno, strerror(errno)); - return -1; - } - - if (all || (LNET_NETTYP(LNET_NIDNET(data.ioc_nid)) != LOLND)) { - printf("%s\n", libcfs_nid2str(data.ioc_nid)); - if (return_nid) { - *(__u64 *)(argv[1]) = data.ioc_nid; - return_nid--; - } - } - } - - return 0; -} - -int -jt_ptl_which_nid (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int best_dist = 0; - int best_order = 0; - lnet_nid_t best_nid = LNET_NID_ANY; - int dist; - int order; - lnet_nid_t nid; - char *nidstr; - int rc; - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s NID [NID...]\n", argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) { - nidstr = argv[i]; - nid = libcfs_str2nid(nidstr); - if (nid == LNET_NID_ANY) { - fprintf(stderr, "Can't parse NID %s\n", nidstr); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_nid = nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNET_DIST, &data); - if (rc != 0) { - fprintf(stderr, "Can't get distance to %s: %s\n", - nidstr, strerror(errno)); - return -1; - } - - dist = data.ioc_u32[0]; - order = data.ioc_u32[1]; - - if (dist < 0) { - if (dist == -EHOSTUNREACH) - continue; - - fprintf(stderr, "Unexpected distance to %s: %d\n", - nidstr, dist); - return -1; - } - - if (best_nid == LNET_NID_ANY || - dist < best_dist || - (dist == best_dist && order < best_order)) { - best_dist = dist; - best_order = order; - best_nid = nid; - } - } - - if (best_nid == LNET_NID_ANY) { - fprintf(stderr, "No reachable NID\n"); - return -1; - } - - printf("%s\n", libcfs_nid2str(best_nid)); - return 0; -} - -int -jt_ptl_print_interfaces (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - char buffer[3][64]; - int index; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, 0)) - return -1; - - for (index = 0;;index++) { - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_INTERFACE, &data); - if (rc != 0) - break; - - printf ("%s: (%s/%s) npeer %d nroute %d\n", - ptl_ipaddr_2_str(data.ioc_u32[0], buffer[2], 1), - ptl_ipaddr_2_str(data.ioc_u32[0], buffer[0], 0), - ptl_ipaddr_2_str(data.ioc_u32[1], buffer[1], 0), - data.ioc_u32[2], data.ioc_u32[3]); - } - - if (index == 0) { - if (errno == ENOENT) { - printf ("<no interfaces>\n"); - } else { - fprintf(stderr, "Error getting interfaces: %s: " - "check dmesg.\n", - strerror(errno)); - } - } - - return 0; -} - -int -jt_ptl_add_interface (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - __u32 ipaddr; - int rc; - __u32 netmask = 0xffffff00; - int i; - int count; - char *end; - - if (argc < 2 || argc > 3) { - fprintf (stderr, "usage: %s ipaddr [netmask]\n", argv[0]); - return 0; - } - - if (!g_net_is_compatible(argv[0], SOCKLND, 0)) - return -1; - - if (lnet_parse_ipaddr(&ipaddr, argv[1]) != 0) { - fprintf (stderr, "Can't parse ip: %s\n", argv[1]); - return -1; - } - - if (argc > 2 ) { - count = strtol(argv[2], &end, 0); - if (count > 0 && count < 32 && *end == 0) { - netmask = 0; - for (i = count; i > 0; i--) - netmask = netmask|(1<<(32-i)); - } else if (lnet_parse_ipquad(&netmask, argv[2]) != 0) { - fprintf (stderr, "Can't parse netmask: %s\n", argv[2]); - return -1; - } - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_u32[0] = ipaddr; - data.ioc_u32[1] = netmask; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_INTERFACE, &data); - if (rc != 0) { - fprintf (stderr, "failed to add interface: %s\n", - strerror (errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_del_interface (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int rc; - __u32 ipaddr = 0; - - if (argc > 2) { - fprintf (stderr, "usage: %s [ipaddr]\n", argv[0]); - return 0; - } - - if (!g_net_is_compatible(argv[0], SOCKLND, 0)) - return -1; - - if (argc == 2 && - lnet_parse_ipaddr(&ipaddr, argv[1]) != 0) { - fprintf (stderr, "Can't parse ip: %s\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_u32[0] = ipaddr; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_INTERFACE, &data); - if (rc != 0) { - fprintf (stderr, "failed to delete interface: %s\n", - strerror (errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_print_peers (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_process_id_t id; - char buffer[2][64]; - int index; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, PTLLND, MXLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) - return -1; - - for (index = 0;;index++) { - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_PEER, &data); - if (rc != 0) - break; - - if (g_net_is_compatible(NULL, SOCKLND, 0)) { - id.nid = data.ioc_nid; - id.pid = data.ioc_u32[4]; - printf ("%-20s [%d]%s->%s:%d #%d\n", - libcfs_id2str(id), - data.ioc_count, /* persistence */ - ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* my ip */ - ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */ - data.ioc_u32[1], /* peer port */ - data.ioc_u32[3]); /* conn_count */ - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - id.nid = data.ioc_nid; - id.pid = data.ioc_u32[4]; - printf ("%-20s s %d%s [%d] "LPD64".%06d" - " m "LPD64"/"LPD64" q %d/%d c %d/%d\n", - libcfs_id2str(id), - data.ioc_net, /* state */ - data.ioc_flags ? "" : " ~!h", /* sent_hello */ - data.ioc_count, /* refcount */ - data.ioc_u64[0]/1000000, /* incarnation secs */ - (int)(data.ioc_u64[0]%1000000), /* incarnation usecs */ - (((__u64)data.ioc_u32[1])<<32) | - ((__u64)data.ioc_u32[0]), /* next_matchbits */ - (((__u64)data.ioc_u32[3])<<32) | - ((__u64)data.ioc_u32[2]), /* last_matchbits_seen */ - data.ioc_u32[5] >> 16, /* nsendq */ - data.ioc_u32[5] & 0xffff, /* nactiveq */ - data.ioc_u32[6] >> 16, /* credits */ - data.ioc_u32[6] & 0xffff); /* outstanding_credits */ - } else if (g_net_is_compatible(NULL, RALND, OPENIBLND, CIBLND, VIBLND, 0)) { - printf ("%-20s [%d]@%s:%d\n", - libcfs_nid2str(data.ioc_nid), /* peer nid */ - data.ioc_count, /* peer persistence */ - ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */ - data.ioc_u32[1]); /* peer port */ - } else { - printf ("%-20s [%d]\n", - libcfs_nid2str(data.ioc_nid), data.ioc_count); - } - } - - if (index == 0) { - if (errno == ENOENT) { - printf ("<no peers>\n"); - } else { - fprintf(stderr, "Error getting peer list: %s: " - "check dmesg.\n", - strerror(errno)); - } - } - return 0; -} - -int -jt_ptl_add_peer (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid; - __u32 ip = 0; - int port = 0; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, - OPENIBLND, CIBLND, IIBLND, VIBLND, 0)) - return -1; - - if (g_net_is_compatible(NULL, SOCKLND, OPENIBLND, CIBLND, RALND, 0)) { - if (argc != 4) { - fprintf (stderr, "usage(tcp,openib,cib,ra): %s nid ipaddr port\n", - argv[0]); - return 0; - } - } else if (g_net_is_compatible(NULL, VIBLND, 0)) { - if (argc != 3) { - fprintf (stderr, "usage(vib): %s nid ipaddr\n", - argv[0]); - return 0; - } - } else if (argc != 2) { - fprintf (stderr, "usage(iib): %s nid\n", argv[0]); - return 0; - } - - nid = libcfs_str2nid(argv[1]); - if (nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse NID: %s\n", argv[1]); - return -1; - } - - if (g_net_is_compatible (NULL, SOCKLND, OPENIBLND, CIBLND, VIBLND, RALND, 0) && - lnet_parse_ipaddr (&ip, argv[2]) != 0) { - fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]); - return -1; - } - - if (g_net_is_compatible (NULL, SOCKLND, OPENIBLND, CIBLND, RALND, 0) && - lnet_parse_port (&port, argv[3]) != 0) { - fprintf (stderr, "Can't parse port: %s\n", argv[3]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_nid = nid; - data.ioc_u32[0] = ip; - data.ioc_u32[1] = port; - - rc = l_ioctl (LNET_DEV_ID, IOC_LIBCFS_ADD_PEER, &data); - if (rc != 0) { - fprintf (stderr, "failed to add peer: %s\n", - strerror (errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_del_peer (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid = LNET_NID_ANY; - lnet_pid_t pid = LNET_PID_ANY; - __u32 ip = 0; - char *end; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, PTLLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) - return -1; - - if (g_net_is_compatible(NULL, SOCKLND, 0)) { - if (argc > 3) { - fprintf (stderr, "usage: %s [nid] [ipaddr]\n", - argv[0]); - return 0; - } - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - if (argc > 3) { - fprintf (stderr, "usage: %s [nid] [pid]\n", - argv[0]); - return 0; - } - } else if (argc > 2) { - fprintf (stderr, "usage: %s [nid]\n", argv[0]); - return 0; - } - - if (argc > 1 && - !libcfs_str2anynid(&nid, argv[1])) { - fprintf (stderr, "Can't parse nid: %s\n", argv[1]); - return -1; - } - - if (g_net_is_compatible(NULL, SOCKLND, 0)) { - if (argc > 2 && - lnet_parse_ipaddr (&ip, argv[2]) != 0) { - fprintf (stderr, "Can't parse ip addr: %s\n", - argv[2]); - return -1; - } - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - if (argc > 2) { - pid = strtol(argv[2], &end, 0); - if (end == argv[2] || *end == 0) { - fprintf(stderr, "Can't parse pid %s\n", - argv[2]); - return -1; - } - } - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_nid = nid; - data.ioc_u32[0] = ip; - data.ioc_u32[1] = pid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_PEER, &data); - if (rc != 0) { - fprintf (stderr, "failed to remove peer: %s\n", - strerror (errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_print_connections (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_process_id_t id; - char buffer[2][64]; - int index; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) - return -1; - - for (index = 0; ; index++) { - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_CONN, &data); - if (rc != 0) - break; - - if (g_net_is_compatible (NULL, SOCKLND, 0)) { - id.nid = data.ioc_nid; - id.pid = data.ioc_u32[6]; - printf ("%-20s %s[%d]%s->%s:%d %d/%d %s\n", - libcfs_id2str(id), - (data.ioc_u32[3] == SOCKLND_CONN_ANY) ? "A" : - (data.ioc_u32[3] == SOCKLND_CONN_CONTROL) ? "C" : - (data.ioc_u32[3] == SOCKLND_CONN_BULK_IN) ? "I" : - (data.ioc_u32[3] == SOCKLND_CONN_BULK_OUT) ? "O" : "?", - data.ioc_u32[4], /* scheduler */ - ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* local IP addr */ - ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* remote IP addr */ - data.ioc_u32[1], /* remote port */ - data.ioc_count, /* tx buffer size */ - data.ioc_u32[5], /* rx buffer size */ - data.ioc_flags ? "nagle" : "nonagle"); - } else if (g_net_is_compatible (NULL, RALND, 0)) { - printf ("%-20s [%d]\n", - libcfs_nid2str(data.ioc_nid), - data.ioc_u32[0] /* device id */); - } else { - printf ("%s\n", libcfs_nid2str(data.ioc_nid)); - } - } - - if (index == 0) { - if (errno == ENOENT) { - printf ("<no connections>\n"); - } else { - fprintf(stderr, "Error getting connection list: %s: " - "check dmesg.\n", - strerror(errno)); - } - } - return 0; -} - -int jt_ptl_disconnect(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid = LNET_NID_ANY; - __u32 ipaddr = 0; - int rc; - - if (argc > 3) { - fprintf(stderr, "usage: %s [nid] [ipaddr]\n", argv[0]); - return 0; - } - - if (!g_net_is_compatible (NULL, SOCKLND, RALND, MXLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) - return 0; - - if (argc >= 2 && - !libcfs_str2anynid(&nid, argv[1])) { - fprintf (stderr, "Can't parse nid %s\n", argv[1]); - return -1; - } - - if (g_net_is_compatible (NULL, SOCKLND, 0) && - argc >= 3 && - lnet_parse_ipaddr (&ipaddr, argv[2]) != 0) { - fprintf (stderr, "Can't parse ip addr %s\n", argv[2]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_nid = nid; - data.ioc_u32[0] = ipaddr; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CLOSE_CONNECTION, &data); - if (rc != 0) { - fprintf(stderr, "failed to remove connection: %s\n", - strerror(errno)); - return -1; - } - - return 0; -} - -int jt_ptl_push_connection (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int rc; - lnet_nid_t nid = LNET_NID_ANY; - - if (argc > 2) { - fprintf(stderr, "usage: %s [nid]\n", argv[0]); - return 0; - } - - if (!g_net_is_compatible (argv[0], SOCKLND, 0)) - return -1; - - if (argc > 1 && - !libcfs_str2anynid(&nid, argv[1])) { - fprintf(stderr, "Can't parse nid: %s\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_nid = nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PUSH_CONNECTION, &data); - if (rc != 0) { - fprintf(stderr, "failed to push connection: %s\n", - strerror(errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_print_active_txs (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int index; - int rc; - - if (!g_net_is_compatible (argv[0], QSWLND, 0)) - return -1; - - for (index = 0;;index++) { - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_TXDESC, &data); - if (rc != 0) - break; - - printf ("type %u payload %6d to %s via %s by pid %6d: " - "%s, %s, state %d\n", - data.ioc_u32[0], - data.ioc_count, - libcfs_nid2str(data.ioc_nid), - libcfs_nid2str(data.ioc_u64[0]), - data.ioc_u32[1], - (data.ioc_flags & 1) ? "delayed" : "immediate", - (data.ioc_flags & 2) ? "nblk" : "normal", - data.ioc_flags >> 2); - } - - if (index == 0) { - if (errno == ENOENT) { - printf ("<no active descs>\n"); - } else { - fprintf(stderr, "Error getting active transmits list: " - "%s: check dmesg.\n", - strerror(errno)); - } - } - return 0; -} - -int jt_ptl_ping(int argc, char **argv) -{ - int rc; - int timeout; - lnet_process_id_t id; - lnet_process_id_t ids[16]; - int maxids = sizeof(ids)/sizeof(ids[0]); - struct libcfs_ioctl_data data; - char *sep; - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s id [timeout (secs)]\n", argv[0]); - return 0; - } - - sep = strchr(argv[1], '-'); - if (sep == NULL) { - id.pid = LNET_PID_ANY; - id.nid = libcfs_str2nid(argv[1]); - if (id.nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); - return -1; - } - } else { - char *end; - - if (argv[1][0] == 'u' || - argv[1][0] == 'U') - id.pid = strtoul(&argv[1][1], &end, 0) | LNET_PID_USERFLAG; - else - id.pid = strtoul(argv[1], &end, 0); - - id.nid = libcfs_str2nid(sep + 1); - - if (end != sep || - id.nid == LNET_NID_ANY) { - fprintf(stderr, "Can't parse process id \"%s\"\n", argv[1]); - return -1; - } - } - - if (argc > 2) - timeout = 1000 * atol(argv[2]); - else - timeout = 1000; /* default 1 second timeout */ - - LIBCFS_IOC_INIT (data); - data.ioc_nid = id.nid; - data.ioc_u32[0] = id.pid; - data.ioc_u32[1] = timeout; - data.ioc_plen1 = sizeof(ids); - data.ioc_pbuf1 = (char *)ids; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PING, &data); - if (rc != 0) { - fprintf(stderr, "failed to ping %s: %s\n", - id.pid == LNET_PID_ANY ? - libcfs_nid2str(id.nid) : libcfs_id2str(id), - strerror(errno)); - return -1; - } - - for (i = 0; i < data.ioc_count && i < maxids; i++) - printf("%s\n", libcfs_id2str(ids[i])); - - if (data.ioc_count > maxids) - printf("%d out of %d ids listed\n", maxids, data.ioc_count); - - return 0; -} - -int jt_ptl_mynid(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid; - int rc; - - if (argc != 2) { - fprintf(stderr, "usage: %s NID\n", argv[0]); - return 0; - } - - nid = libcfs_str2nid(argv[1]); - if (nid == LNET_NID_ANY) { - fprintf(stderr, "Can't parse NID '%s'\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = LNET_NIDNET(nid); - data.ioc_nid = nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_REGISTER_MYNID, &data); - if (rc < 0) - fprintf(stderr, "setting my NID failed: %s\n", - strerror(errno)); - else - printf("registered my nid %s\n", libcfs_nid2str(nid)); - - return 0; -} - -int -jt_ptl_fail_nid (int argc, char **argv) -{ - int rc; - lnet_nid_t nid; - unsigned int threshold; - struct libcfs_ioctl_data data; - - if (argc < 2 || argc > 3) - { - fprintf (stderr, "usage: %s nid|\"*\" [count (0 == mend)]\n", argv[0]); - return (0); - } - - if (!libcfs_str2anynid(&nid, argv[1])) - { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); - return (-1); - } - - if (argc < 3) { - threshold = LNET_MD_THRESH_INF; - } else if (sscanf (argv[2], "%i", &threshold) != 1) { - fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]); - return (-1); - } - - LIBCFS_IOC_INIT (data); - data.ioc_nid = nid; - data.ioc_count = threshold; - - rc = l_ioctl (LNET_DEV_ID, IOC_LIBCFS_FAIL_NID, &data); - if (rc < 0) - fprintf (stderr, "IOC_LIBCFS_FAIL_NID failed: %s\n", - strerror (errno)); - else - printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]); - - return (0); -} - -int -jt_ptl_add_route (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t gateway_nid; - unsigned int hops = 1; - char *end; - int rc; - - if (argc < 2 || argc > 3) - { - fprintf (stderr, "usage: %s gateway [hopcount]\n", argv[0]); - return (0); - } - - if (!g_net_is_set(argv[0])) - return (-1); - - gateway_nid = libcfs_str2nid(argv[1]); - if (gateway_nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]); - return (-1); - } - - if (argc == 3) { - hops = strtoul(argv[2], &end, 0); - if (hops >= 256 || *end != 0) { - fprintf (stderr, "Can't parse hopcount \"%s\"\n", argv[2]); - return -1; - } - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = hops; - data.ioc_nid = gateway_nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_ROUTE, &data); - if (rc != 0) { - fprintf (stderr, "IOC_LIBCFS_ADD_ROUTE failed: %s\n", strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_del_route (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid; - int rc; - - if (argc != 2) { - fprintf (stderr, "usage: %s gatewayNID\n", argv[0]); - return (0); - } - - if (!libcfs_str2anynid(&nid, argv[1])) { - fprintf (stderr, "Can't parse gateway NID " - "\"%s\"\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net_set ? g_net : LNET_NIDNET(LNET_NID_ANY); - data.ioc_nid = nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_ROUTE, &data); - if (rc != 0) { - fprintf (stderr, "IOC_LIBCFS_DEL_ROUTE (%s) failed: %s\n", - libcfs_nid2str(nid), strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_notify_router (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int enable; - lnet_nid_t nid; - int rc; - struct timeval now; - time_t when; - - if (argc < 3) - { - fprintf (stderr, "usage: %s targetNID <up/down> [<time>]\n", - argv[0]); - return (0); - } - - nid = libcfs_str2nid(argv[1]); - if (nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[1]); - return (-1); - } - - if (lnet_parse_bool (&enable, argv[2]) != 0) { - fprintf (stderr, "Can't parse boolean %s\n", argv[2]); - return (-1); - } - - gettimeofday(&now, NULL); - - if (argc < 4) { - when = now.tv_sec; - } else if (lnet_parse_time (&when, argv[3]) != 0) { - fprintf(stderr, "Can't parse time %s\n" - "Please specify either 'YYYY-MM-DD-HH:MM:SS'\n" - "or an absolute unix time in seconds\n", argv[3]); - return (-1); - } else if (when > now.tv_sec) { - fprintf (stderr, "%s specifies a time in the future\n", - argv[3]); - return (-1); - } - - LIBCFS_IOC_INIT(data); - data.ioc_nid = nid; - data.ioc_flags = enable; - /* Yeuch; 'cept I need a __u64 on 64 bit machines... */ - data.ioc_u64[0] = (__u64)when; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_NOTIFY_ROUTER, &data); - if (rc != 0) { - fprintf (stderr, "IOC_LIBCFS_NOTIFY_ROUTER (%s) failed: %s\n", - libcfs_nid2str(nid), strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_print_routes (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int rc; - int index; - __u32 net; - lnet_nid_t nid; - unsigned int hops; - int alive; - - for (index = 0;;index++) - { - LIBCFS_IOC_INIT(data); - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_ROUTE, &data); - if (rc != 0) - break; - - net = data.ioc_net; - hops = data.ioc_count; - nid = data.ioc_nid; - alive = data.ioc_flags; - - printf ("net %18s hops %u gw %32s %s\n", - libcfs_net2str(net), hops, - libcfs_nid2str(nid), alive ? "up" : "down"); - } - - if (errno != ENOENT) - fprintf(stderr, "Error getting routes: %s: check dmesg.\n", - strerror(errno)); - - return (0); -} - -static int -lwt_control(int enable, int clear) -{ - struct libcfs_ioctl_data data; - int rc; - - LIBCFS_IOC_INIT(data); - data.ioc_flags = (enable ? 1 : 0) | (clear ? 2 : 0); - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_CONTROL, &data); - if (rc == 0) - return (0); - - fprintf(stderr, "IOC_LIBCFS_LWT_CONTROL failed: %s\n", - strerror(errno)); - return (-1); -} - -static int -lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, - lwt_event_t *events, int size) -{ - struct libcfs_ioctl_data data; - int rc; - - LIBCFS_IOC_INIT(data); - data.ioc_pbuf1 = (char *)events; - data.ioc_plen1 = size; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_SNAPSHOT, &data); - if (rc != 0) { - fprintf(stderr, "IOC_LIBCFS_LWT_SNAPSHOT failed: %s\n", - strerror(errno)); - return (-1); - } - - /* crappy overloads */ - if (data.ioc_u32[2] != sizeof(lwt_event_t) || - data.ioc_u32[3] != offsetof(lwt_event_t, lwte_where)) { - fprintf(stderr,"kernel/user LWT event mismatch %d(%d),%d(%d)\n", - (int)data.ioc_u32[2], (int)sizeof(lwt_event_t), - (int)data.ioc_u32[3], - (int)offsetof(lwt_event_t, lwte_where)); - return (-1); - } - - if (now != NULL) - *now = data.ioc_u64[0]; - - LASSERT (data.ioc_u32[0] != 0); - if (ncpu != NULL) - *ncpu = data.ioc_u32[0]; - - LASSERT (data.ioc_u32[1] != 0); - if (totalsize != NULL) - *totalsize = data.ioc_u32[1]; - - return (0); -} - -static char * -lwt_get_string(char *kstr) -{ - char *ustr; - struct libcfs_ioctl_data data; - int size; - int rc; - - /* FIXME: this could maintain a symbol table since we expect to be - * looking up the same strings all the time... */ - - LIBCFS_IOC_INIT(data); - data.ioc_pbuf1 = kstr; - data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */ - data.ioc_pbuf2 = NULL; - data.ioc_plen2 = 0; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_LOOKUP_STRING, &data); - if (rc != 0) { - fprintf(stderr, "IOC_LIBCFS_LWT_LOOKUP_STRING failed: %s\n", - strerror(errno)); - return (NULL); - } - - size = data.ioc_count; - ustr = (char *)malloc(size); - if (ustr == NULL) { - fprintf(stderr, "Can't allocate string storage of size %d\n", - size); - return (NULL); - } - - LIBCFS_IOC_INIT(data); - data.ioc_pbuf1 = kstr; - data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */ - data.ioc_pbuf2 = ustr; - data.ioc_plen2 = size; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_LOOKUP_STRING, &data); - if (rc != 0) { - fprintf(stderr, "IOC_LIBCFS_LWT_LOOKUP_STRING failed: %s\n", - strerror(errno)); - return (NULL); - } - - LASSERT(strlen(ustr) == size - 1); - return (ustr); -} - -static void -lwt_put_string(char *ustr) -{ - free(ustr); -} - -static int -lwt_print(FILE *f, cycles_t t0, cycles_t tlast, double mhz, int cpu, lwt_event_t *e) -{ -#ifndef __WORDSIZE -# error "__WORDSIZE not defined" -#elif __WORDSIZE == 32 -# define XFMT "%#010lx" -#elif __WORDSIZE== 64 -# define XFMT "%#018lx" -#else -# error "Unexpected __WORDSIZE" -#endif - char *where = lwt_get_string(e->lwte_where); - - if (where == NULL) - return (-1); - - fprintf(f, XFMT" "XFMT" "XFMT" "XFMT": "XFMT" %2d %10.6f %10.2f %s\n", - e->lwte_p1, e->lwte_p2, e->lwte_p3, e->lwte_p4, - (long)e->lwte_task, cpu, (e->lwte_when - t0) / (mhz * 1000000.0), - (t0 == e->lwte_when) ? 0.0 : (e->lwte_when - tlast) / mhz, - where); - - lwt_put_string(where); - - return (0); -#undef XFMT -} - -double -get_cycles_per_usec () -{ - FILE *f = fopen ("/proc/cpuinfo", "r"); - double mhz; - char line[64]; - - if (f != NULL) { - while (fgets (line, sizeof (line), f) != NULL) - if (sscanf (line, "cpu MHz : %lf", &mhz) == 1) { - fclose (f); - return (mhz); - } - fclose (f); - } - - fprintf (stderr, "Can't read/parse /proc/cpuinfo\n"); - return (1000.0); -} - -int -jt_ptl_lwt(int argc, char **argv) -{ - const int lwt_max_cpus = 32; - int ncpus; - int totalspace; - int nevents_per_cpu; - lwt_event_t *events; - lwt_event_t *cpu_event[lwt_max_cpus + 1]; - lwt_event_t *next_event[lwt_max_cpus]; - lwt_event_t *first_event[lwt_max_cpus]; - int cpu; - lwt_event_t *e; - int rc; - int i; - double mhz; - cycles_t t0; - cycles_t tlast; - cycles_t tnow; - struct timeval tvnow; - int printed_date = 0; - int nlines = 0; - FILE *f = stdout; - - if (argc < 2 || - (strcmp(argv[1], "start") && - strcmp(argv[1], "stop"))) { - fprintf(stderr, - "usage: %s start\n" - " %s stop [fname]\n", argv[0], argv[0]); - return (-1); - } - - if (!strcmp(argv[1], "start")) { - /* disable */ - if (lwt_control(0, 0) != 0) - return (-1); - - /* clear */ - if (lwt_control(0, 1) != 0) - return (-1); - - /* enable */ - if (lwt_control(1, 0) != 0) - return (-1); - - return (0); - } - - if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0) - return (-1); - - if (ncpus > lwt_max_cpus) { - fprintf(stderr, "Too many cpus: %d (%d)\n", - ncpus, lwt_max_cpus); - return (-1); - } - - events = (lwt_event_t *)malloc(totalspace); - if (events == NULL) { - fprintf(stderr, "Can't allocate %d\n", totalspace); - return (-1); - } - - if (lwt_control(0, 0) != 0) { /* disable */ - free(events); - return (-1); - } - - if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) { - free(events); - return (-1); - } - - /* we want this time to be sampled at snapshot time */ - gettimeofday(&tvnow, NULL); - - if (argc > 2) { - f = fopen (argv[2], "w"); - if (f == NULL) { - fprintf(stderr, "Can't open %s for writing: %s\n", argv[2], strerror (errno)); - free(events); - return (-1); - } - } - - mhz = get_cycles_per_usec(); - - /* carve events into per-cpu slices */ - nevents_per_cpu = totalspace / (ncpus * sizeof(lwt_event_t)); - for (cpu = 0; cpu <= ncpus; cpu++) - cpu_event[cpu] = &events[cpu * nevents_per_cpu]; - - /* find the earliest event on each cpu */ - for (cpu = 0; cpu < ncpus; cpu++) { - first_event[cpu] = NULL; - - for (e = cpu_event[cpu]; e < cpu_event[cpu + 1]; e++) { - - if (e->lwte_where == NULL) /* not an event */ - continue; - - if (first_event[cpu] == NULL || - first_event[cpu]->lwte_when > e->lwte_when) - first_event[cpu] = e; - } - - next_event[cpu] = first_event[cpu]; - } - - t0 = tlast = 0; - for (cpu = 0; cpu < ncpus; cpu++) { - e = first_event[cpu]; - if (e == NULL) /* no events this cpu */ - continue; - - if (e == cpu_event[cpu]) - e = cpu_event[cpu + 1] - 1; - else - e = e - 1; - - /* If there's an event immediately before the first one, this - * cpu wrapped its event buffer */ - if (e->lwte_where == NULL) - continue; - - /* We should only start outputting events from the most recent - * first event in any wrapped cpu. Events before this time on - * other cpus won't have any events from this CPU to interleave - * with. */ - if (t0 < first_event[cpu]->lwte_when) - t0 = first_event[cpu]->lwte_when; - } - - for (;;) { - /* find which cpu has the next event */ - cpu = -1; - for (i = 0; i < ncpus; i++) { - - if (next_event[i] == NULL) /* this cpu exhausted */ - continue; - - if (cpu < 0 || - next_event[i]->lwte_when < next_event[cpu]->lwte_when) - cpu = i; - } - - if (cpu < 0) /* all cpus exhausted */ - break; - - if (t0 == 0) { - /* no wrapped cpus and this is he first ever event */ - t0 = next_event[cpu]->lwte_when; - } - - if (t0 <= next_event[cpu]->lwte_when) { - /* on or after the first event */ - if (!printed_date) { - cycles_t du = (tnow - t0) / mhz; - time_t then = tvnow.tv_sec - du/1000000; - - if (du % 1000000 > tvnow.tv_usec) - then--; - - fprintf(f, "%s", ctime(&then)); - printed_date = 1; - } - - rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]); - if (rc != 0) - break; - - if (++nlines % 10000 == 0 && f != stdout) { - /* show some activity... */ - printf("."); - fflush (stdout); - } - } - - tlast = next_event[cpu]->lwte_when; - - next_event[cpu]++; - if (next_event[cpu] == cpu_event[cpu + 1]) - next_event[cpu] = cpu_event[cpu]; - - if (next_event[cpu]->lwte_where == NULL || - next_event[cpu] == first_event[cpu]) - next_event[cpu] = NULL; - } - - if (f != stdout) { - printf("\n"); - fclose(f); - } - - free(events); - return (0); -} - -int jt_ptl_memhog(int argc, char **argv) -{ - static int gfp = 0; /* sticky! */ - - struct libcfs_ioctl_data data; - int rc; - int count; - char *end; - - if (argc < 2) { - fprintf(stderr, "usage: %s <npages> [<GFP flags>]\n", argv[0]); - return 0; - } - - count = strtol(argv[1], &end, 0); - if (count < 0 || *end != 0) { - fprintf(stderr, "Can't parse page count '%s'\n", argv[1]); - return -1; - } - - if (argc >= 3) { - rc = strtol(argv[2], &end, 0); - if (*end != 0) { - fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]); - return -1; - } - gfp = rc; - } - - LIBCFS_IOC_INIT(data); - data.ioc_count = count; - data.ioc_flags = gfp; - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_MEMHOG, &data); - - if (rc != 0) { - fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno)); - return -1; - } - - printf("memhog %d OK\n", count); - return 0; -} - -int jt_ptl_testprotocompat(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int rc; - int flags; - char *end; - - if (argc < 2) { - fprintf(stderr, "usage: %s <number>\n", argv[0]); - return 0; - } - - flags = strtol(argv[1], &end, 0); - if (flags < 0 || *end != 0) { - fprintf(stderr, "Can't parse flags '%s'\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_flags = flags; - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_TESTPROTOCOMPAT, &data); - - if (rc != 0) { - fprintf(stderr, "test proto compat %x failed: %s\n", - flags, strerror(errno)); - return -1; - } - - printf("test proto compat %x OK\n", flags); - return 0; -} - - diff --git a/lnet/utils/ptlctl.c b/lnet/utils/ptlctl.c deleted file mode 100644 index f6b7f4009287545d6a332d9be3e88a61f95e0ea4..0000000000000000000000000000000000000000 --- a/lnet/utils/ptlctl.c +++ /dev/null @@ -1,76 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> - -#include "parser.h" - - -command_t list[] = { - {"network", jt_ptl_network, 0,"select/configure network (args: up|down|LND name)"}, - {"net", jt_ptl_network, 0,"select/configure network (args: up|down|LND name)"}, - {"list_nids", jt_ptl_list_nids, 0,"list local NIDs"}, - {"which_nid", jt_ptl_which_nid, 0,"select the closest NID"}, - {"print_interfaces", jt_ptl_print_interfaces, 0, "print interface entries (no args)"}, - {"add_interface", jt_ptl_add_interface, 0, "add interface entry (args: ip [netmask])"}, - {"del_interface", jt_ptl_del_interface, 0, "delete interface entries (args: [ip])"}, - {"print_peers", jt_ptl_print_peers, 0, "print peer entries (no args)"}, - {"add_peer", jt_ptl_add_peer, 0, "add peer entry (args: nid host port)"}, - {"del_peer", jt_ptl_del_peer, 0, "delete peer entry (args: [nid] [host])"}, - {"print_conns", jt_ptl_print_connections, 0, "print connections (no args)"}, - {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"}, - {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"}, - {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"}, - {"ping", jt_ptl_ping, 0, "ping (args: nid [timeout] [pid])"}, - {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"}, - {"add_route", jt_ptl_add_route, 0, - "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"}, - {"del_route", jt_ptl_del_route, 0, - "delete all routes via a gateway from the routing table (args: gatewayNID"}, - {"set_route", jt_ptl_notify_router, 0, - "enable/disable a route in the routing table (args: gatewayNID up/down [time]"}, - {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"}, - {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, - {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"}, - {"testprotocompat", jt_ptl_testprotocompat, 0, "usage: testprotocompat count"}, - {"help", Parser_help, 0, "help"}, - {"exit", Parser_quit, 0, "quit"}, - {"quit", Parser_quit, 0, "quit"}, - { 0, 0, 0, NULL } -}; - -int main(int argc, char **argv) -{ - if (ptl_initialize(argc, argv) < 0) - exit(1); - - Parser_init("ptlctl > ", list); - if (argc > 1) - return Parser_execarg(argc - 1, &argv[1], list); - - Parser_commands(); - - return 0; -} diff --git a/lnet/utils/routerstat.c b/lnet/utils/routerstat.c deleted file mode 100644 index 0b4e5135539fabf0fdad7e63bb49d2a7bf73f6b8..0000000000000000000000000000000000000000 --- a/lnet/utils/routerstat.c +++ /dev/null @@ -1,158 +0,0 @@ -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/time.h> - -double -timenow () -{ - struct timeval tv; - - gettimeofday (&tv, NULL); - return (tv.tv_sec + tv.tv_usec / 1000000.0); -} - -typedef struct { - unsigned long msgs_alloc; - unsigned long msgs_max; - unsigned long errors; - unsigned long send_count; - unsigned long recv_count; - unsigned long route_count; - unsigned long drop_count; - unsigned long long send_length; - unsigned long long recv_length; - unsigned long long route_length; - unsigned long long drop_length; -} counters_t; - -unsigned long long subull(unsigned long long a, unsigned long long b) -{ - if (a < b) - return -1ULL - b + a + 1; - - return a - b; -} - -unsigned long long subul(unsigned long a, unsigned long b) -{ - if (a < b) - return -1UL - b + a + 1; - - return a - b; -} - -double rul(unsigned long a, double secs) -{ - return (double)a/secs; -} - -double rull(unsigned long long a, double secs) -{ - return (double)a/secs; -} - -void -do_stat (int fd) -{ - static char buffer[1024]; - static double last = 0.0; - static counters_t old_counter; - double now; - double t; - counters_t new_counter; - counters_t counter; - int n; - - lseek (fd, 0, SEEK_SET); - now = timenow(); - n = read (fd, buffer, sizeof (buffer)); - if (n < 0) - { - fprintf (stderr, "Can't read statfile\n"); - exit (1); - } - buffer[n] = 0; - - n = sscanf (buffer, "%lu %lu %lu %lu %lu %lu %lu %Lu %Lu %Lu %Lu", - &new_counter.msgs_alloc, &new_counter.msgs_max, - &new_counter.errors, - &new_counter.send_count, &new_counter.recv_count, - &new_counter.route_count, &new_counter.drop_count, - &new_counter.send_length, &new_counter.recv_length, - &new_counter.route_length, &new_counter.drop_length); - if (n < 11) - { - fprintf (stderr, "Can't parse statfile\n"); - exit (1); - } - - if (last == 0.0) { - printf ("M %lu(%lu) E %lu S %lu/%llu R %lu/%llu F %lu/%llu D %lu/%llu\n", - new_counter.msgs_alloc, new_counter.msgs_max, - new_counter.errors, - new_counter.send_count, new_counter.send_length, - new_counter.recv_count, new_counter.recv_length, - new_counter.route_count, new_counter.route_length, - new_counter.drop_count, new_counter.drop_length); - } else { - t = now - last; - - counter.msgs_alloc = new_counter.msgs_alloc; - counter.msgs_max = new_counter.msgs_max; - - counter.errors = subul(new_counter.errors, old_counter.errors); - counter.send_count = subul(new_counter.send_count, old_counter.send_count); - counter.recv_count = subul(new_counter.recv_count, old_counter.recv_count); - counter.route_count = subul(new_counter.route_count, old_counter.route_count); - counter.drop_count = subul(new_counter.drop_count, old_counter.drop_count); - counter.send_length = subull(new_counter.send_length, old_counter.send_length); - counter.recv_length = subull(new_counter.recv_length, old_counter.recv_length); - counter.route_length = subull(new_counter.route_length, old_counter.route_length); - counter.drop_length = subull(new_counter.drop_length, old_counter.drop_length); - - printf ("M %3lu(%3lu) E %0.0f S %7.2f/%6.0f R %7.2f/%6.0f F %7.2f/%6.0f D %4.2f/%0.0f\n", - counter.msgs_alloc, counter.msgs_max, - rul(counter.errors,t), - rull(counter.send_length,t*1024.0*1024.0), rul(counter.send_count, t), - rull(counter.recv_length,t*1024.0*1024.0), rul(counter.recv_count, t), - rull(counter.route_length,t*1024.0*1024.0), rul(counter.route_count, t), - rull(counter.drop_length,t*1024.0*1024.0), rul(counter.drop_count, t)); - } - - old_counter = new_counter; - fflush (stdout); - - lseek (fd, 0, SEEK_SET); - last = timenow(); -} - -int main (int argc, char **argv) -{ - int interval = 0; - int fd; - - if (argc > 1) - interval = atoi (argv[1]); - - fd = open ("/proc/sys/lnet/stats", O_RDONLY); - if (fd < 0) - { - fprintf (stderr, "Can't open stat: %s\n", strerror (errno)); - return (1); - } - - do_stat (fd); - if (interval == 0) - return (0); - - for (;;) - { - sleep (interval); - do_stat (fd); - } -} diff --git a/lnet/utils/wirecheck.c b/lnet/utils/wirecheck.c deleted file mode 100644 index 9590b8b9ab196e4caca2b9ebb278c34108c050eb..0000000000000000000000000000000000000000 --- a/lnet/utils/wirecheck.c +++ /dev/null @@ -1,213 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include <stdio.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <lnet/lib-lnet.h> - -#include <string.h> - -#ifndef HAVE_STRNLEN -#define strnlen(s, i) strlen(s) -#endif - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf (" CLASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE((int)offsetof(s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - BLANK_LINE (); \ - COMMENT ("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(s)); \ -} while (0) - -void -check_lnet_handle_wire (void) -{ - CHECK_STRUCT (lnet_handle_wire_t); - CHECK_MEMBER (lnet_handle_wire_t, wh_interface_cookie); - CHECK_MEMBER (lnet_handle_wire_t, wh_object_cookie); -} - -void -check_lnet_magicversion (void) -{ - CHECK_STRUCT (lnet_magicversion_t); - CHECK_MEMBER (lnet_magicversion_t, magic); - CHECK_MEMBER (lnet_magicversion_t, version_major); - CHECK_MEMBER (lnet_magicversion_t, version_minor); -} - -void -check_lnet_hdr (void) -{ - CHECK_STRUCT (lnet_hdr_t); - CHECK_MEMBER (lnet_hdr_t, dest_nid); - CHECK_MEMBER (lnet_hdr_t, src_nid); - CHECK_MEMBER (lnet_hdr_t, dest_pid); - CHECK_MEMBER (lnet_hdr_t, src_pid); - CHECK_MEMBER (lnet_hdr_t, type); - CHECK_MEMBER (lnet_hdr_t, payload_length); - CHECK_MEMBER (lnet_hdr_t, msg); - - BLANK_LINE (); - COMMENT ("Ack"); - CHECK_MEMBER (lnet_hdr_t, msg.ack.dst_wmd); - CHECK_MEMBER (lnet_hdr_t, msg.ack.match_bits); - CHECK_MEMBER (lnet_hdr_t, msg.ack.mlength); - - BLANK_LINE (); - COMMENT ("Put"); - CHECK_MEMBER (lnet_hdr_t, msg.put.ack_wmd); - CHECK_MEMBER (lnet_hdr_t, msg.put.match_bits); - CHECK_MEMBER (lnet_hdr_t, msg.put.hdr_data); - CHECK_MEMBER (lnet_hdr_t, msg.put.ptl_index); - CHECK_MEMBER (lnet_hdr_t, msg.put.offset); - - BLANK_LINE (); - COMMENT ("Get"); - CHECK_MEMBER (lnet_hdr_t, msg.get.return_wmd); - CHECK_MEMBER (lnet_hdr_t, msg.get.match_bits); - CHECK_MEMBER (lnet_hdr_t, msg.get.ptl_index); - CHECK_MEMBER (lnet_hdr_t, msg.get.src_offset); - CHECK_MEMBER (lnet_hdr_t, msg.get.sink_length); - - BLANK_LINE (); - COMMENT ("Reply"); - CHECK_MEMBER (lnet_hdr_t, msg.reply.dst_wmd); - - BLANK_LINE (); - COMMENT ("Hello"); - CHECK_MEMBER (lnet_hdr_t, msg.hello.incarnation); - CHECK_MEMBER (lnet_hdr_t, msg.hello.type); -} - -void -system_string (char *cmdline, char *str, int len) -{ - int fds[2]; - int rc; - pid_t pid; - - rc = pipe (fds); - if (rc != 0) - abort (); - - pid = fork (); - if (pid == 0) { - /* child */ - int fd = fileno(stdout); - - rc = dup2(fds[1], fd); - if (rc != fd) - abort(); - - exit(system(cmdline)); - /* notreached */ - } else if ((int)pid < 0) { - abort(); - } else { - FILE *f = fdopen (fds[0], "r"); - - if (f == NULL) - abort(); - - close(fds[1]); - - if (fgets(str, len, f) == NULL) - abort(); - - if (waitpid(pid, &rc, 0) != pid) - abort(); - - if (!WIFEXITED(rc) || - WEXITSTATUS(rc) != 0) - abort(); - - if (strnlen(str, len) == len) - str[len - 1] = 0; - - if (str[strlen(str) - 1] == '\n') - str[strlen(str) - 1] = 0; - - fclose(f); - } -} - -int -main (int argc, char **argv) -{ - char unameinfo[256]; - char gccinfo[256]; - - system_string("uname -a", unameinfo, sizeof(unameinfo)); - system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); - - printf ("void lnet_assert_wire_constants (void)\n" - "{\n" - " /* Wire protocol assertions generated by 'wirecheck'\n" - " * running on %s\n" - " * with %s */\n" - "\n", unameinfo, gccinfo); - - BLANK_LINE (); - - COMMENT ("Constants..."); - - CHECK_DEFINE (LNET_PROTO_OPENIB_MAGIC); - CHECK_DEFINE (LNET_PROTO_RA_MAGIC); - - CHECK_DEFINE (LNET_PROTO_TCP_MAGIC); - CHECK_DEFINE (LNET_PROTO_TCP_VERSION_MAJOR); - CHECK_DEFINE (LNET_PROTO_TCP_VERSION_MINOR); - - CHECK_VALUE (LNET_MSG_ACK); - CHECK_VALUE (LNET_MSG_PUT); - CHECK_VALUE (LNET_MSG_GET); - CHECK_VALUE (LNET_MSG_REPLY); - CHECK_VALUE (LNET_MSG_HELLO); - - check_lnet_handle_wire (); - check_lnet_magicversion (); - check_lnet_hdr (); - - printf ("}\n\n"); - - return (0); -}